001package org.dllearner.kb.sparql; 002 003import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 004import org.apache.jena.query.QueryExecution; 005import org.apache.jena.rdf.model.Model; 006import org.apache.jena.sparql.core.Var; 007import org.apache.jena.sparql.util.FmtUtils; 008import org.apache.jena.vocabulary.RDF; 009import org.slf4j.Logger; 010import org.slf4j.LoggerFactory; 011 012import java.lang.invoke.MethodHandles; 013import java.util.Set; 014import java.util.TreeSet; 015import java.util.stream.Collectors; 016 017/** 018 * @author Lorenz Buehmann 019 */ 020public abstract class AbstractConciseBoundedDescriptionGenerator implements ConciseBoundedDescriptionGenerator { 021 022 protected static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); 023 024// protected final QueryExecutionFactory qef; 025 protected QueryExecutionFactory qef; 026 027 protected Set<String> allowedPropertyNamespaces = new TreeSet<>(); 028 protected Set<String> allowedObjectNamespaces = new TreeSet<>(); 029 protected Set<String> allowedClassNamespaces = new TreeSet<>(); 030 protected Set<String> ignoredProperties = new TreeSet<>(); 031 032 protected AbstractConciseBoundedDescriptionGenerator(QueryExecutionFactory qef) { 033 this.qef = qef; 034 } 035 036 public void setAllowedPropertyNamespaces(Set<String> allowedPropertyNamespaces) { 037 this.allowedPropertyNamespaces = allowedPropertyNamespaces; 038 } 039 040 public void setAllowedObjectNamespaces(Set<String> allowedObjectNamespaces) { 041 this.allowedObjectNamespaces = allowedObjectNamespaces; 042 } 043 044 public void setAllowedClassNamespaces(Set<String> allowedClassNamespaces) { 045 this.allowedClassNamespaces = allowedClassNamespaces; 046 } 047 048 public void setIgnoredProperties(Set<String> ignoredProperties) { 049 this.ignoredProperties = ignoredProperties; 050 } 051 052 protected abstract String generateQuery(String resource, int depth, boolean withTypesForLeafs); 053 054 @Override 055 public Model getConciseBoundedDescription(String resource, int depth, boolean withTypesForLeafs) { 056 log.trace("Computing CBD for {} ...", resource); 057 long start = System.currentTimeMillis(); 058 String query = generateQuery(resource, depth, withTypesForLeafs); 059 log.debug(query); 060 try(QueryExecution qe = qef.createQueryExecution(query)) { 061 Model model = qe.execConstruct(); 062 log.trace("Got {} triples in {} ms.", model.size(), (System.currentTimeMillis() - start)); 063 return model; 064 } catch (Exception e) { 065 log.error("Failed to computed CBD for resource {}", resource ); 066 throw new RuntimeException("Failed to computed CBD for resource " + resource, e); 067 } 068 } 069 070 071 private boolean USE_FILTER_IN = true; 072 private static final String FILTER_NOT_IN_CLAUSE = "%s NOT IN (%s)"; 073 private static final String FILTER_IN_CLAUSE = "%s IN (%s)"; 074 075 protected String createPredicateFilter(final Var predicateVar) { 076 String filter = ""; 077 078 if(!ignoredProperties.isEmpty()) { 079 filter += "FILTER("; 080 if(USE_FILTER_IN) { 081 filter += String.format( 082 FILTER_NOT_IN_CLAUSE, 083 predicateVar.toString(), 084 ignoredProperties.stream() 085 .map(p -> "<" + p + ">") 086 .collect(Collectors.joining(","))); 087 } else { 088 filter += ignoredProperties.stream() 089 .map(input -> predicateVar.toString() + " != <" + input + ">") 090 .collect(Collectors.joining(" && ")); 091 } 092 filter += ")\n"; 093 } 094 095 if(!allowedPropertyNamespaces.isEmpty()){ 096 filter += "FILTER(" + predicateVar + " = <" + RDF.type.getURI() + "> || "; 097 filter += allowedPropertyNamespaces.stream() 098 .map(ns -> "(STRSTARTS(STR(" + predicateVar + "),'" + ns + "'))") 099 .collect(Collectors.joining(" || ")); 100 filter += ")\n"; 101 } 102 103 return filter; 104 } 105 106 protected String createObjectFilter(Var predicateVar, Var objectVar){ 107 String filter = ""; 108 if(!allowedObjectNamespaces.isEmpty() || !allowedClassNamespaces.isEmpty()) { 109 filter += "FILTER(ISLITERAL(" + objectVar + ")"; 110 } 111 112 if(!allowedObjectNamespaces.isEmpty()){ 113 filter += " || (" + predicateVar + " != " + FmtUtils.stringForResource(RDF.type) + " && "; 114 filter += allowedObjectNamespaces.stream() 115 .map(ns -> "(STRSTARTS(STR(" + objectVar + "),'" + ns + "'))") 116 .collect(Collectors.joining(" || ")); 117 filter += ")\n"; 118 } else if(!allowedClassNamespaces.isEmpty()){ 119 filter += " || " + predicateVar + " != " + FmtUtils.stringForResource(RDF.type) + " || "; 120 } 121 122 if(!allowedClassNamespaces.isEmpty()){ 123// if(allowedObjectNamespaces.isEmpty()) { 124// filter += predicateVar + " != " + FmtUtils.stringForResource(RDF.type) + " || "; 125// } 126 filter += "(" + predicateVar + " = " + FmtUtils.stringForResource(RDF.type) + " && "; 127 filter += allowedClassNamespaces.stream() 128 .map(ns -> "(STRSTARTS(STR(" + objectVar + "),'" + ns + "'))") 129 .collect(Collectors.joining(" || ")); 130 filter += ")\n"; 131 } else if(!allowedObjectNamespaces.isEmpty()){ 132 filter += " || " + predicateVar + " = " + FmtUtils.stringForResource(RDF.type); 133 } 134 135 if(!allowedObjectNamespaces.isEmpty() || !allowedClassNamespaces.isEmpty()) { 136 filter += ")"; 137 } 138 return filter; 139 } 140}