001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.sparql; 020 021import com.google.common.collect.Lists; 022import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 023import org.apache.commons.lang3.NotImplementedException; 024import org.apache.jena.graph.NodeFactory; 025import org.apache.jena.graph.impl.LiteralLabel; 026import org.apache.jena.query.QueryExecution; 027import org.apache.jena.query.QueryFactory; 028import org.apache.jena.rdf.model.Literal; 029import org.apache.jena.rdf.model.Model; 030import org.apache.jena.rdf.model.ModelFactory; 031import org.apache.jena.riot.WebContent; 032import org.apache.jena.riot.system.ErrorHandler; 033import org.apache.jena.riot.system.ErrorHandlerFactory; 034import org.apache.jena.riot.web.HttpNames; 035import org.apache.jena.sparql.engine.http.QueryEngineHTTP; 036import org.apache.jena.sparql.util.FmtUtils; 037import org.dllearner.algorithms.qtl.QueryTreeUtils; 038import org.dllearner.kb.SparqlEndpointKS; 039import org.dllearner.utilities.QueryUtils; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043import java.io.BufferedReader; 044import java.io.ByteArrayInputStream; 045import java.io.InputStream; 046import java.io.InputStreamReader; 047import java.net.URL; 048import java.nio.charset.StandardCharsets; 049import java.util.ArrayList; 050import java.util.List; 051import java.util.Set; 052import java.util.TreeSet; 053import java.util.concurrent.atomic.AtomicBoolean; 054import java.util.concurrent.atomic.AtomicInteger; 055import java.util.stream.Collectors; 056 057/** 058 * {@inheritDoc} 059 * @author Lorenz Buehmann 060 * 061 */ 062public class TreeBasedConciseBoundedDescriptionGenerator implements ConciseBoundedDescriptionGenerator{ 063 064 private static final Logger logger = LoggerFactory.getLogger(TreeBasedConciseBoundedDescriptionGenerator.class); 065 private SparqlEndpoint endpoint; 066 067 private Set<String> allowedPropertyNamespaces = new TreeSet<>(); 068 private Set<String> allowedObjectNamespaces = new TreeSet<>(); 069 070 private QueryExecutionFactory qef; 071 072 private AtomicInteger inIndex = new AtomicInteger(0); 073 private AtomicInteger outIndex = new AtomicInteger(0); 074 private AtomicInteger predIndex = new AtomicInteger(0); 075 076 private boolean useUnionOptimization = true; 077 private boolean workaround = false; 078 079 public TreeBasedConciseBoundedDescriptionGenerator(QueryExecutionFactory qef) { 080 this.qef = qef; 081 } 082 083 public void setWorkaround(boolean workaround) { 084 this.workaround = workaround; 085 } 086 087 public void setEndpoint(SparqlEndpoint endpoint) { 088 this.endpoint = endpoint; 089 } 090 091 public Model getConciseBoundedDescription(LiteralLabel literal, CBDStructureTree structureTree) throws Exception { 092 logger.trace("Computing CBD for {} ...", literal); 093 long start = System.currentTimeMillis(); 094 String query = generateQuery(literal, structureTree); 095 System.out.println(query); 096 097 if(workaround) { 098 return constructWithReplacement(endpoint, query); 099 } 100 101 try(QueryExecution qe = qef.createQueryExecution(query)) { 102 Model model = qe.execConstruct(); 103 long end = System.currentTimeMillis(); 104 logger.trace("Got {} triples in {} ms.", model.size(), (end - start)); 105 return model; 106 } catch(Exception e) { 107 logger.error("CBD retrieval failed when using query\n{}", query); 108 throw new Exception("CBD retrieval failed when using query\n" + query, e); 109 } 110 } 111 112 /* (non-Javadoc) 113 * @see org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator#getConciseBoundedDescription(java.lang.String, int, boolean) 114 */ 115 public Model getConciseBoundedDescription(String resourceURI, CBDStructureTree structureTree) throws Exception { 116 logger.trace("Computing CBD for {} ...", resourceURI); 117 long start = System.currentTimeMillis(); 118 String query = generateQuery(resourceURI, structureTree); 119 System.out.println(QueryFactory.create(query)); 120 121 if(workaround) { 122 return constructWithReplacement(endpoint, query); 123 } 124 125 try(QueryExecution qe = qef.createQueryExecution(query)) { 126 Model model = qe.execConstruct(); 127 long end = System.currentTimeMillis(); 128 logger.trace("Got {} triples in {} ms.", model.size(), (end - start)); 129 return model; 130 } catch(Exception e) { 131 throw new Exception("CBD retrieval failed when using query\n" + query, e); 132 } 133 } 134 135 @Override 136 public Model getConciseBoundedDescription(String resource, int depth, boolean withTypesForLeafs) { 137 throw new NotImplementedException("please use getConciseBoundedDescription(String resourceURI, CBDStructureTree structureTree) instead."); 138 } 139 140 @Override 141 public void setAllowedPropertyNamespaces(Set<String> namespaces) { 142 this.allowedPropertyNamespaces.addAll(namespaces); 143 } 144 145 @Override 146 public void setAllowedObjectNamespaces(Set<String> namespaces) { 147 this.allowedObjectNamespaces.addAll(namespaces); 148 } 149 150 /** 151 * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth. 152 * @param literal The example resource for which a CONSTRUCT query is created. 153 * @return the SPARQL query 154 */ 155 private String generateQuery(LiteralLabel literal, CBDStructureTree structureTree){ 156 reset(); 157 158 // get paths to leaf nodes 159 List<List<CBDStructureTree>> pathsToLeafs = QueryTreeUtils.getPathsToLeafs(structureTree); 160 161 StringBuilder query = new StringBuilder(); 162 String rootToken = FmtUtils.stringForNode(NodeFactory.createLiteral(literal)); 163 164 query.append("CONSTRUCT {\n"); 165 // the CONSTRUCT template 166 append(query, structureTree, rootToken, true); 167 query.append("} WHERE {\n"); 168 reset(); 169 // the query pattern 170 append(query, structureTree, rootToken, false); 171 query.append("}"); 172 173 return query.toString(); 174 } 175 176 /** 177 * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth. 178 * @param resource The example resource for which a CONSTRUCT query is created. 179 * @return the SPARQL query 180 */ 181 private String generateQuery(String resource, CBDStructureTree structureTree){ 182 reset(); 183 184 // get paths to leaf nodes 185 List<List<CBDStructureTree>> pathsToLeafs = QueryTreeUtils.getPathsToLeafs(structureTree); 186 187 StringBuilder query = new StringBuilder(); 188 String rootToken = "<" + resource + ">"; 189 190 query.append("CONSTRUCT {\n"); 191 // the CONSTRUCT template 192 append(query, structureTree, rootToken, true); 193 query.append("} WHERE {\n"); 194 reset(); 195 // the query pattern 196 append(query, structureTree, rootToken, false); 197 query.append("}"); 198 199 return query.toString(); 200 } 201 202 private void append(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) { 203 // use optimization if enabled 204 if(useUnionOptimization) { 205 appendUnionOptimized2(query, tree, rootVar, isConstructTemplate); 206 return; 207 } 208 209 tree.getChildren().forEach(child -> { 210 // check if we have to put it into an OPTIONAL clause 211 boolean optionalNeeded = !isConstructTemplate && child.isOutNode() && !tree.isRoot() && !tree.isInNode(); 212 213 // open OPTIONAL if necessary 214 if(optionalNeeded) { 215 query.append("OPTIONAL {"); 216 } 217 218 // append triple pattern 219 String var; 220 if(child.isInNode()) { 221 var = "?x_in" + inIndex.getAndIncrement(); 222 String predVar = "?p" + predIndex.getAndIncrement(); 223 query.append(String.format("%s %s %s .\n", var, predVar, rootVar)); 224 } else { 225 var = "?x_out" + outIndex.getAndIncrement(); 226 String predVar = "?p" + predIndex.getAndIncrement(); 227 query.append(String.format("%s %s %s .\n", rootVar, predVar, var)); 228 } 229 230 // recursively process the child node 231 append(query, child, var, isConstructTemplate); 232 233 // close OPTIONAL if necessary 234 if(optionalNeeded) { 235 query.append("}"); 236 } 237 }); 238 } 239 240 private void appendUnionOptimized(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) { 241 List<List<CBDStructureTree>> paths = QueryTreeUtils.getPathsToLeafs(tree); 242 243 List<String> tpClusters = paths.stream().map(path -> { 244 StringBuilder currentVar = new StringBuilder(rootVar); 245 StringBuilder tps = new StringBuilder(); 246 AtomicBoolean lastOut = new AtomicBoolean(false); 247 StringBuilder appendix = new StringBuilder(); 248 path.forEach(node -> { 249 boolean optionalNeeded = !isConstructTemplate && lastOut.get() && node.isOutNode(); 250 251 // open OPTIONAL if necessary 252 if(optionalNeeded) { 253 tps.append("OPTIONAL {"); 254 appendix.append("}"); 255 } 256 257 // append triple pattern 258 String var; 259 if (node.isInNode()) { 260 var = "?x_in" + inIndex.getAndIncrement(); 261 String predVar = "?p" + predIndex.getAndIncrement(); 262 tps.append(String.format("%s %s %s .\n", var, predVar, currentVar.toString())); 263 } else { 264 var = "?x_out" + outIndex.getAndIncrement(); 265 String predVar = "?p" + predIndex.getAndIncrement(); 266 tps.append(String.format("%s %s %s .\n", currentVar.toString(), predVar, var)); 267 lastOut.set(true); 268 } 269 currentVar.setLength(0); 270 currentVar.append(var); 271 }); 272 273 // add closing braces for OPTIONAL if used 274 tps.append(appendix); 275 276 return tps.toString(); 277 }).collect(Collectors.toList()); 278 279 String queryPart = tpClusters.stream() 280 .map(s -> isConstructTemplate ? s : "{" + s + "}") 281 .collect(Collectors.joining(isConstructTemplate ? "" : " UNION ")); 282 query.append(queryPart); 283 } 284 285 private void appendUnionOptimized2(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) { 286 List<List<CBDStructureTree>> paths = QueryTreeUtils.getPathsToLeafs(tree); 287 288 // get all sub-paths 289 paths = paths.stream().flatMap(path -> { 290 List<List<CBDStructureTree>> subPaths = new ArrayList<>(); 291 for (int length = 1; length <= path.size(); length++) { 292 subPaths.add(path.subList(0, length)); 293 } 294 return subPaths.stream(); 295 }).collect(Collectors.toList()); 296 297 String rdfTypeFilter = "FILTER(%s != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)\n"; 298 299 300 List<String> tpClusters = paths.stream().map(path -> { 301 StringBuilder currentVar = new StringBuilder(rootVar); 302 StringBuilder tps = new StringBuilder(); 303 AtomicBoolean lastOut = new AtomicBoolean(false); 304 305 path.forEach(node -> { 306 // append triple pattern 307 String var; 308 if (node.isInNode()) { 309 if(lastOut.get() && !isConstructTemplate) { 310 tps.append(String.format(rdfTypeFilter, "?p_" + (predIndex.get() - 1))); 311 } 312 var = "?x_in" + inIndex.getAndIncrement(); 313 String predVar = "?p" + predIndex.getAndIncrement(); 314 tps.append(String.format("%s %s %s .\n", var, predVar, currentVar.toString())); 315 316 } else { 317 var = "?x_out" + outIndex.getAndIncrement(); 318 String predVar = "?p" + predIndex.getAndIncrement(); 319 tps.append(String.format("%s %s %s .\n", currentVar.toString(), predVar, var)); 320 lastOut.set(true); 321 } 322 currentVar.setLength(0); 323 currentVar.append(var); 324 }); 325 326 return tps.toString(); 327 }).collect(Collectors.toList()); 328 329 330 String queryPart = tpClusters.stream() 331 .map(s -> isConstructTemplate ? s : "{" + s + "}") 332 .collect(Collectors.joining(isConstructTemplate ? "" : " UNION ")); 333 query.append(queryPart); 334 } 335 336 /** 337 * Reset variables indices 338 */ 339 private void reset() { 340 inIndex = new AtomicInteger(0); 341 outIndex = new AtomicInteger(0); 342 predIndex = new AtomicInteger(0); 343 } 344 345 @Override 346 public void setIgnoredProperties(Set<String> properties) { 347 348 } 349 350 public void setUseUnionOptimization(boolean useUnionOptimization) { 351 this.useUnionOptimization = useUnionOptimization; 352 } 353 354 public static void main(String[] args) throws Exception { 355 String query = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + 356 "PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" + 357 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + 358 "PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n" + 359 "\n" + 360 "SELECT DISTINCT ?uri\n" + 361 "WHERE\n" + 362 " { ?x <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/William_Shatner> ;\n" + 363 " <http://dbpedia.org/ontology/starring> ?uri\n" + 364 " }"; 365 366 query = "PREFIX dbo: <http://dbpedia.org/ontology/>\n" + 367 "PREFIX : <http://dbpedia.org/resource/>\n" + 368 "\n" + 369 "SELECT DISTINCT ?uri\n" + 370 "WHERE\n" + 371 " { :The_Three_Dancers\n" + 372 " dbo:author ?person .\n" + 373 " ?person dbo:movement ?uri\n" + 374 " }"; 375 query = "PREFIX dbo: <http://dbpedia.org/ontology/>\n" + 376 "PREFIX : <http://dbpedia.org/resource/>\n" + 377 "\n" + 378 "SELECT DISTINCT ?uri\n" + 379 "WHERE\n" + 380 " { ?uri dbo:author ?person . \n" + 381 " ?person dbo:movement :Test\n ." + 382 "?in_0 dbo:starring ?uri . ?in_1 dbo:starring ?in_0 . ?in_0 dbo:book ?o_0 ." + 383 " }"; 384 CBDStructureTree cbdTree = QueryUtils.getOptimalCBDStructure(QueryFactory.create(query)); 385 cbdTree = CBDStructureTree.fromTreeString("root:[in:[out:[]],out:[in:[],out:[out:[]]]]"); 386 387 System.out.println(cbdTree.toStringVerbose()); 388 SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); 389 endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org"); 390 SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); 391 ks.setQueryDelay(0); 392 ks.setUseCache(false); 393 ks.setRetryCount(0); 394 ks.init(); 395 396// QueryExecutionFactory qef = ks.getQueryExecutionFactory(); 397// 398// String q = "CONSTRUCT {\n" + 399// "<http://www4.wiwiss.fu-berlin.de/sider/resource/drugs/2232> ?p0 ?x_out0 .\n" + 400// "} WHERE {\n" + 401// "{<http://www4.wiwiss.fu-berlin.de/sider/resource/drugs/2232> ?p0 ?x_out0 .\n" + 402// "}}"; 403// Model model = ModelFactory.createDefaultModel(); 404// // Parser to first error or warning. 405// ErrorHandler errHandler = ErrorHandlerFactory.errorHandlerWarn; 406// model.getReader().setProperty("error-mode","lax"); 407// 408// System.out.println(model.size()); 409// 410// 411// System.exit(0); 412 TreeBasedConciseBoundedDescriptionGenerator cbdGen = new TreeBasedConciseBoundedDescriptionGenerator(ks.getQueryExecutionFactory()); 413 Model cbd = cbdGen.getConciseBoundedDescription("http://dbpedia.org/resource/Dan_Gauthier", cbdTree); 414 System.out.println(cbd.size()); 415 416 cbdGen.setUseUnionOptimization(false); 417 cbd = cbdGen.getConciseBoundedDescription("http://dbpedia.org/resource/Dan_Gauthier", cbdTree); 418 System.out.println(cbd.size()); 419// cbd.write(System.out, "NTRIPLES"); 420 } 421 422 private Model constructWithReplacement(SparqlEndpoint endpoint, String query) throws Exception{ 423 QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); 424 qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); 425 String request = qe.toString().replace("GET ", ""); 426 427 URL url = new URL(request); 428 java.net.HttpURLConnection conn = (java.net.HttpURLConnection) url.openConnection(); 429 conn.setRequestMethod("GET"); 430 conn.addRequestProperty(HttpNames.hAccept, WebContent.contentTypeRDFXML); 431 try(BufferedReader rdr = new BufferedReader(new InputStreamReader(conn.getInputStream()))) { 432 Model model = ModelFactory.createDefaultModel(); 433 String buf = null; 434 StringBuilder doc = new StringBuilder(); 435 while ((buf = rdr.readLine()) != null) { 436 // Apply regex on buf 437 if(buf.contains("&#")) { 438 buf = buf.replace("&#", ""); 439 } 440 // build output 441 doc.append(buf); 442 } 443 try(InputStream is = new ByteArrayInputStream(doc.toString().getBytes(StandardCharsets.UTF_8))) { 444 model.read(is, null); 445 } 446 return model; 447 } 448 } 449 450}