001package org.dllearner.algorithms.qtl.operations.tuples; 002 003import java.io.File; 004import java.util.*; 005import java.util.concurrent.atomic.AtomicInteger; 006import java.util.function.Function; 007import java.util.stream.Collector; 008import java.util.stream.Collectors; 009import java.util.stream.Stream; 010 011import com.google.common.base.StandardSystemProperty; 012import com.google.common.collect.Lists; 013import com.google.common.collect.Maps; 014import com.google.common.collect.Sets; 015import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 016import org.apache.jena.graph.Node; 017import org.apache.jena.graph.NodeFactory; 018import org.apache.jena.query.*; 019import org.apache.jena.rdf.model.Model; 020import org.apache.jena.shared.PrefixMapping; 021import org.apache.jena.sparql.core.Var; 022import org.apache.jena.sparql.util.FmtUtils; 023import org.apache.logging.log4j.LogManager; 024import org.dllearner.algorithms.qtl.QueryTreeUtils; 025import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 026import org.dllearner.algorithms.qtl.datastructures.rendering.Edge; 027import org.dllearner.algorithms.qtl.datastructures.rendering.Vertex; 028import org.dllearner.algorithms.qtl.exception.QTLException; 029import org.dllearner.algorithms.qtl.impl.QueryTreeFactory; 030import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase; 031import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBaseInv; 032import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; 033import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorSimple; 034import org.dllearner.algorithms.qtl.operations.traversal.PreOrderTreeTraversal; 035import org.dllearner.algorithms.qtl.operations.traversal.TreeTraversal; 036import org.dllearner.algorithms.qtl.util.filters.AbstractTreeFilter; 037import org.dllearner.algorithms.qtl.util.filters.MostSpecificTypesFilter; 038import org.dllearner.algorithms.qtl.util.filters.PredicateExistenceFilterDBpedia; 039import org.dllearner.algorithms.qtl.util.filters.SymmetricPredicatesFilter; 040import org.dllearner.algorithms.qtl.util.vocabulary.DBpedia; 041import org.dllearner.core.AbstractReasonerComponent; 042import org.dllearner.kb.SparqlEndpointKS; 043import org.dllearner.kb.sparql.*; 044import org.dllearner.reasoning.SPARQLReasoner; 045import org.dllearner.utilities.QueryUtils; 046import org.jgrapht.Graph; 047import org.jgrapht.io.ExportException; 048import org.jgrapht.io.GraphMLExporter; 049import org.jgrapht.io.IntegerComponentNameProvider; 050import static java.util.stream.Collectors.groupingBy; 051import static java.util.stream.Collectors.toList; 052 053/** 054 * Experimental algorithm to generate SPARQL queries by example each of which is a tuple of RDF nodes, i.e. 055 * either a resource or literal. 056 * 057 * For example 058 * (:a 15) and (:b 16) 059 * (:a :x "15-11-2000") and (:b :x "15-11-2000") 060 * 061 * The result should be a SPARQL query with n projection variables where n denotes the arity of the tuple (n-tuple). 062 * 063 * @author Lorenz Buehmann 064 */ 065public class QTLTuples { 066 067 private static final org.apache.logging.log4j.Logger log = LogManager.getLogger(QTLTuples.class); 068 069 private final QueryExecutionFactory qef; 070 071 private ConciseBoundedDescriptionGenerator cbdGen; 072 private QueryTreeFactory treeFactory; 073 private LGGGenerator lggGenerator; 074 075 // used for rendering 076 private PrefixMapping pm; 077 private String baseIRI; 078 079 private int maxTreeDepth = 1; 080 081 private Set<AbstractTreeFilter<RDFResourceTree>> treeFilters = new LinkedHashSet<>(); 082 public boolean addTreeFilter(AbstractTreeFilter<RDFResourceTree> treeFilter) { 083 return treeFilters.add(treeFilter); 084 } 085 public boolean removeTreeFilter(AbstractTreeFilter<RDFResourceTree> treeFilter) { 086 return treeFilters.remove(treeFilter); 087 } 088 089 090 public QTLTuples(QueryExecutionFactory qef) { 091 this.qef = qef; 092 093 cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef); 094 treeFactory = new QueryTreeFactoryBase(); 095 lggGenerator = new LGGGeneratorSimple(); 096 } 097 098 /** 099 * Run the QTL algorithm given the 2 tuples as input example. 100 * 101 * @param tuple1 the first example 102 * @param tuple2 the second example 103 */ 104 public void run(List<Node> tuple1, List<Node> tuple2) { 105 Objects.requireNonNull(tuple1,"First tuple must not be null"); 106 Objects.requireNonNull(tuple2,"Second tuple must not be null"); 107 108 run(Lists.newArrayList(tuple1, tuple2)); 109 } 110 111 /** 112 * Run the QTL algorithm given the list of tuples as input examples. 113 * 114 * The elements of a tuple <code>t = (e_1, ..., e_n)</code> represent arbitrary RDF terms, i.e. each <code>e_i</code> can be 115 * either an IRI, a literal, or a blank node. 116 * 117 * <p> 118 * Requirements: 119 * <ul> 120 * <li>at least 2 tuples </li> 121 * <li>a tuple must contain at least one element</li> 122 * <li>for all tuples the number of elements must be the same</li> 123 * </ul> 124 * </p> 125 * 126 * @param tuples the examples 127 */ 128 public List<Map.Entry<RDFResourceTree, List<Node>>> run(List<List<Node>> tuples) { 129 Objects.requireNonNull(tuples,"Tuples must not be null"); 130 131 // sanity checks first 132 checkInput(tuples); 133 134 log.info("input tuples {}", tuples.stream().map(Object::toString).collect(Collectors.joining("\n"))); 135 136 // handle case with tuples of length separately -> just use the LGG of the trees 137 if(tuples.get(0).size() == 1) { 138 return runSingleNodeTuples(tuples); 139 } 140 141 142 // 1. we have to retrieve data for each node 143 // in particular each resource node 144 // for literals it could be to complicated as 145 // a) there are no outgoing triples and 146 // b) the number of incoming triples could be too large as literals like numbers could be used anywhere as value 147 148 // 2. for each mapping of trees, build graph(s) of connected trees 149 150 151 List<Map<String, Map.Entry<RDFResourceTree, List<Node>>>> tuple2Trees = tuples.stream().map(this::connect).collect(toList()); 152// List<Map<String, Map.Entry<RDFResourceTree, List<Node>>>> tuple2Trees = tuples.stream().map(this::computeConnectedTrees).collect(toList()); 153 154 // cluster by key 155 Map<String, ArrayList<Map.Entry<RDFResourceTree, List<Node>>>> grouped = tuple2Trees.stream() 156 .flatMap(m -> m.entrySet().stream()) 157 .collect(groupingBy(Map.Entry::getKey, 158 Collector.of(ArrayList::new, (s, p) -> s.add(p.getValue()), (s1, s2) -> { 159 s1.addAll(s2); 160 return s1; 161 }))); 162 163 // compute LGG per each key 164 List<Map.Entry<RDFResourceTree, List<Node>>> solutions = grouped.entrySet().stream() 165 .filter(e -> e.getValue().size() == tuples.size()) 166 .flatMap(entry -> { 167 log.debug("computing LGG for " + entry.getKey()); 168 169 List<Map.Entry<RDFResourceTree, List<Node>>> list = entry.getValue(); 170 171 List<RDFResourceTree> trees = list.stream().map(Map.Entry::getKey).collect(toList()); 172 173 List<Node> nodes2Select = list.get(0).getValue(); 174 175 trees.forEach(t -> log.trace("tree:\n{}", t::getStringRepresentation)); 176 177 RDFResourceTree lgg = lggGenerator.getLGG(trees); 178 log.debug("lgg:\n{}", lgg::getStringRepresentation); 179 180 if(lgg.isResourceNode()) { 181 log.warn("lgg was not generalizing with root {}", lgg); 182 return Stream.empty(); 183 } 184 185// System.out.println("LGG\n" + lgg.getStringRepresentation()); 186// System.out.println(QueryTreeUtils.toSPARQLQueryString(lgg, nodes2Select, null, PrefixMapping.Standard)); 187 188 return Stream.of(Maps.immutableEntry(lgg, nodes2Select)); 189 }) 190 .collect(Collectors.toList()); 191 192 return solutions; 193 } 194 195 private List<Map.Entry<RDFResourceTree, List<Node>>> runSingleNodeTuples(List<List<Node>> tuples) { 196 // map nodes to trees 197 List<RDFResourceTree> trees = tuples.stream() 198 .flatMap(Collection::stream) // flatten list of lists of nodes 199 .map(this::asTree) // map node to tree 200 .map(Optional::get) 201 .collect(Collectors.toList()); 202 203 // compute LGG 204 RDFResourceTree lgg = lggGenerator.getLGG(trees); 205 log.debug("lgg:\n{}", lgg::getStringRepresentation); 206 207 return Collections.singletonList(Maps.immutableEntry(lgg, Collections.emptyList())); 208 } 209 210 private void checkInput(List<List<Node>> tuples) { 211 Objects.requireNonNull(tuples,"Tuples must not be null"); 212 213 // check for at least 2 tuples 214 if(tuples.size() < 2) { 215 log.warn("Min. number of input tuples is 2."); 216 throw new IllegalArgumentException("Min. number of input tuples is 2."); 217 } 218 219 // check for all tuples having same length 220 boolean sameTupleLength = tuples.stream().mapToInt(List::size).distinct().count() == 1; 221 if(!sameTupleLength) { 222 log.warn("Not all tuples have the same length. Currently, this is required!"); 223 throw new IllegalArgumentException("Not all tuples have the same length. Currently, this is required!"); 224 } 225 } 226 227 private RDFResourceTree applyFilters(RDFResourceTree tree, List<Node> nodes2Keep) { 228 RDFResourceTree filteredTree = tree; 229 230 for (AbstractTreeFilter<RDFResourceTree> f : treeFilters) { 231 f.setNodes2Keep(nodes2Keep); 232 filteredTree = f.apply(filteredTree); 233 } 234 235 return filteredTree; 236 } 237 238// private Map<Node, RDFResourceTree> asTrees(Set<Node> nodes) { 239// 240// } 241 242 private boolean useLiteralData = true; 243 244 public void setUseLiteralData(boolean useLiteralData) { 245 this.useLiteralData = useLiteralData; 246 } 247 248 int cnt = 0; 249 250 private Map<String, Map.Entry<RDFResourceTree, List<Node>>> connect(List<Node> tuple) { 251 log.debug("generating connected tree for tuple {}", tuple); 252 253 // filter URI resources 254 Set<String> resources = tuple.stream() 255 .filter(Node::isURI) 256 .map(Node::getURI) 257 .collect(Collectors.toSet()); 258 259 // map to one large model 260 Model model = cbdGen.getConciseBoundedDescription(resources); 261 if(model.isEmpty()) { 262 throw new RuntimeException(new QTLException("Could not get data for tuple " + tuple)); 263 } 264 log.debug("#triples:{}", model.size()); 265 266// PseudoGraphJenaGraph g = new PseudoGraphJenaGraph(model.getGraph()); 267 268// List<RDFNode> steinerNodes = tuple.stream().map(model::asRDFNode).collect(Collectors.toList()); 269// SteinerTreeGeneric<RDFNode, Statement> steinerTreeGen = new SteinerTreeGeneric<>(g, steinerNodes, 270// new EdgeFactoryJenaModel(model, anyProp)); 271// WeightedMultigraph<RDFNode, Statement> steinerTree = steinerTreeGen.getDefaultSteinerTree(); 272 273// GraphMLExporter<Node, Triple> exporter = new GraphMLExporter<>( 274// Node::toString, n -> FmtUtils.stringForNode(n, pm), 275// new IntegerComponentNameProvider<>(), e -> FmtUtils.stringForNode(e.getPredicate(), pm)); 276// try { 277// exporter.exportGraph(g, new FileWriter(new File("/tmp/steiner_tree_" + cnt++ + ".graphml"))); 278// } catch (IOException | ExportException e) { 279// log.error("failed to write graph to file", e); 280// } 281 282 List<Node> nodes = new ArrayList<>(tuple); 283 284 // starting from each node n, create the tree with n as root 285 Map<String, Map.Entry<RDFResourceTree, List<Node>>> result = new TreeMap<>(); 286 tuple.stream() 287 .filter(Node::isURI) 288 .forEach(node -> { 289 RDFResourceTree tree = treeFactory.getQueryTree(node.getURI(), model, 3); 290 Set<Node> nodes2Select = new LinkedHashSet<>(nodes); 291// nodes.remove(node); 292// Set<String> keys = keys(tree, asNodes(nodes)); 293 294// System.out.println(nodes); 295// System.out.println(node); 296// System.out.println(tree.getStringRepresentation()); 297// System.out.println(QueryTreeUtils.getNodes(tree).containsAll(nodes)); 298 299 String key = Integer.toString(nodes.indexOf(node)); 300 if(QueryTreeUtils.getNodeLabels(tree).containsAll(nodes)) { 301 nodes2Select.remove(node); 302 303 Set<Node> nodes2Project = new LinkedHashSet<>(); 304 nodes2Select.forEach(n -> { 305 Node anchor = NodeFactory.createBlankNode("var" + tuple.indexOf(n)); 306 getMatchingTreeNodes(tree, n).forEach(child -> { 307 child.setAnchorVar(anchor); 308 nodes2Project.add(anchor); 309 }); 310 }); 311 log.debug("connected tree\n{}", tree::getStringRepresentation); 312 QueryTreeUtils.asGraph(tree, baseIRI, pm, new File(System.getProperty("java.io.tmpdir") + File.separator + "tree-" + FmtUtils.stringForNode(node, pm) + ".graphml")); 313 result.put(key, Maps.immutableEntry(tree, new ArrayList<>(nodes2Project))); 314 }; 315 316 317 318 }); 319 320 321 log.debug("got {} possible connected trees", result.size()); 322 return result; 323 } 324 325// private Set<String> keys(RDFResourceTree tree, List<Node> nodes) { 326// List<RDFResourceTree> children = tree.getChildren(); 327// String key = "" + nodes.indexOf(tree.getData()); 328// children.stream().map(child -> { 329// int pos = nodes.indexOf(child.getData()); 330// if( pos >= 0) { 331// key += pos + "" 332// } 333// }); 334// 335// } 336 337 338 private Map<String, Map.Entry<RDFResourceTree, List<Node>>> computeConnectedTrees(List<Node> tuple) { 339 340 // mapping to tree for each node in tuple 341 Map<Node, Optional<RDFResourceTree>> mapping = mapping(tuple); 342 343 Map<String, Map.Entry<RDFResourceTree, List<Node>>> key2Trees = new HashMap<>(); 344 345 mapping.forEach((node, tree) -> { 346 if(node.isURI()) { 347 final StringBuilder key = new StringBuilder(tuple.indexOf(node)); 348 List<Node> nodes2Select = new ArrayList<>(); 349 350 RDFResourceTree newTree = new RDFResourceTree(tree.get()); 351 352 final AtomicInteger modified = new AtomicInteger(0); 353 354 mapping.forEach((otherNode, otherTree) -> { 355 if(!node.equals(otherNode)) { 356 List<RDFResourceTree> matchingTreeNodes = getMatchingTreeNodes(newTree, otherNode); 357 358 if(!matchingTreeNodes.isEmpty()) { 359 modified.set(1); 360 key.append(tuple.indexOf(otherNode)); 361 } 362 363 // plugin tree of other node 364 matchingTreeNodes.forEach(treeNode -> { 365 if(treeNode.isResourceNode()) { 366 Node edge = treeNode.getEdgeToParent(); 367 RDFResourceTree parent = treeNode.getParent(); 368 // copy the tree that will be attached in the current tree 369 RDFResourceTree newChild = new RDFResourceTree(otherTree.get()); 370 // replace the data with some anchor 371 Node newData = NodeFactory.createBlankNode("var" + tuple.indexOf(otherNode)); 372// newChild.setData(newData); 373 newChild.setAnchorVar(newData); 374 375 // attach the tree as child node 376 parent.replaceChild(treeNode, newChild, edge); 377 378 parent.addChild(newChild, edge); 379 nodes2Select.add(newData); 380// System.out.println("TEST\n" + newTree.getStringRepresentation()); 381 } else { 382 Node edge = treeNode.getEdgeToParent(); 383 RDFResourceTree parent = treeNode.getParent(); 384 parent.removeChild(treeNode, edge); 385 Node newData = NodeFactory.createBlankNode("var" + tuple.indexOf(otherNode)); 386// treeNode.setData(newData); 387 treeNode.setAnchorVar(newData); 388 parent.addChild(treeNode, edge); 389 nodes2Select.add(newData); 390 } 391 392 }); 393 394 395 } 396 }); 397 if(modified.get() == 1) { 398 log.debug("connected tree({}):\n{}", () -> key, newTree::getStringRepresentation); 399 400// QueryTreeUtils.asGraph(newTree, baseIRI, pm, new File("/tmp/tree-" + pm.shortForm(node.getURI()) + ".graphml")); 401 key2Trees.put(key.toString(), Maps.immutableEntry(newTree, nodes2Select)); 402 } 403 } 404 }); 405 406 return key2Trees; 407 } 408 409 /** 410 * Find nodes matching the data in the given tree. 411 */ 412 private List<RDFResourceTree> getMatchingTreeNodes(RDFResourceTree tree, Node node) { 413 List<RDFResourceTree> treeNodes = new ArrayList<>(); 414 415 TreeTraversal<RDFResourceTree> treeTraversal = new PreOrderTreeTraversal<>(tree); 416 treeTraversal.forEachRemaining(treeNode -> { 417 if(treeNode.getData().matches(node)) { 418 treeNodes.add(treeNode); 419 } 420 }); 421 422 return treeNodes; 423 } 424 425 private Optional<RDFResourceTree> asTree(Node node) { 426 if (node.isURI()) { 427 if(useIncomingTriples) { 428 TreeBasedConciseBoundedDescriptionGenerator treeCBDGen = new TreeBasedConciseBoundedDescriptionGenerator(qef); 429 try { 430 Model cbd = treeCBDGen.getConciseBoundedDescription(node.getURI(), CBDStructureTree.fromTreeString("root:[in:[out:[]],out:[]]")); 431 return Optional.of(treeFactory.getQueryTree(node.toString(), cbd, 2)); 432 } catch (Exception e) { 433 log.error("Failed to compute CBD for " + node, e); 434 } 435 return Optional.empty(); 436 } else { 437 String iri = node.getURI(); 438 Model cbd = cbdGen.getConciseBoundedDescription(iri, maxTreeDepth); 439 RDFResourceTree tree = treeFactory.getQueryTree(node.getURI(), cbd, maxTreeDepth); 440 log.debug("tree({}):\n{}", node::toString, tree::getStringRepresentation); 441 return Optional.of(tree); 442 } 443 } else { 444 if(useIncomingTriples) { 445 TreeBasedConciseBoundedDescriptionGenerator treeCBDGen = new TreeBasedConciseBoundedDescriptionGenerator(qef); 446 try { 447 Model cbd = treeCBDGen.getConciseBoundedDescription(node.getLiteral(), CBDStructureTree.fromTreeString("root:[in:[out:[]]]")); 448 return Optional.of(treeFactory.getQueryTree(node.toString(), cbd, maxTreeDepth)); 449 } catch (Exception e) { 450 log.error("Failed to compute CBD for " + node, e); 451 } 452 } else { 453 return Optional.of(new RDFResourceTree(node)); 454 } 455 return Optional.empty(); 456 } 457 } 458 459 private LinkedHashMap<Node, Optional<RDFResourceTree>> mapping(List<Node> tuple) { 460 return tuple.stream().collect( 461 Collectors.toMap( 462 Function.identity(), 463 this::asTree, 464 (u, v) -> { 465 throw new IllegalStateException(String.format("Duplicate key %s", u)); 466 }, 467 LinkedHashMap::new) 468 ); 469 } 470 471 private boolean useIncomingTriples = false; 472 473 public boolean isUseIncomingTriples() { 474 return useIncomingTriples; 475 } 476 477 public void setUseIncomingTriples(boolean useIncomingTriples) { 478 this.useIncomingTriples = useIncomingTriples; 479 } 480 481 /** 482 * @param cbdGen the generator used to create the CBD for each resource in an input tuple 483 */ 484 public void setCBDGenerator(ConciseBoundedDescriptionGenerator cbdGen) { 485 this.cbdGen = cbdGen; 486 } 487 488 /** 489 * @param treeFactory the factory used to create a tree from a resource and its set of triples (CBD) 490 */ 491 public void setTreeFactory(QueryTreeFactory treeFactory) { 492 this.treeFactory = treeFactory; 493 } 494 495 /** 496 * @param lggGenerator the LGG generator used during the QTL algorithm 497 */ 498 public void setLggGenerator(LGGGenerator lggGenerator) { 499 this.lggGenerator = lggGenerator; 500 } 501 502 /** 503 * @param maxTreeDepth max. tree depth used for data retrieval and query tree generation of the input examples. 504 */ 505 public void setMaxTreeDepth(int maxTreeDepth) { 506 this.maxTreeDepth = maxTreeDepth; 507 } 508 509 public void setPrefixMapping(PrefixMapping pm) { 510 this.pm = pm; 511 } 512 513 public void setBaseIRI(String baseIRI) { 514 this.baseIRI = baseIRI; 515 } 516 517 public static void main(String[] args) throws Exception { 518 System.setProperty("logFilename", "log4j2.properties"); 519// org.apache.log4j.Logger.getRootLogger().getLoggerRepository().resetConfiguration(); 520// org.apache.log4j.Logger.getRootLogger().setLevel(Level.DEBUG); 521// org.apache.log4j.Logger.getLogger(QTLTuples.class).setLevel(Level.DEBUG); 522 523 String queryStr = "select * where { " + 524 "?company a <http://dbpedia.org/ontology/Organisation> . " + 525 "?company <http://dbpedia.org/ontology/foundationPlace> <http://dbpedia.org/resource/California> . " + 526 "?product <http://dbpedia.org/ontology/developer> ?company . " + 527 "?product a <http://dbpedia.org/ontology/Software> . }"; 528 529 queryStr = "PREFIX foaf: <http://xmlns.com/foaf/0.1/> " + 530 "PREFIX dbp: <http://dbpedia.org/property/> " + 531 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + 532 "PREFIX dbont: <http://dbpedia.org/ontology/> " + 533 "SELECT ?name ?date WHERE { " + 534 "?name dbont:artist <http://dbpedia.org/resource/The_Beatles> . " + 535 "?name rdf:type dbont:Album . " + 536 "?name dbont:releaseDate ?date}" + 537 " ORDER BY ?date"; 538 539 queryStr = "PREFIX dbpedia: <http://dbpedia.org/ontology/> \n" + 540 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + 541 "SELECT distinct ?b ?c WHERE { \n" + 542 "?b rdf:type dbpedia:Film . \n" + 543 "?c rdf:type dbpedia:Artist . \n" + 544 "?b dbpedia:director <http://dbpedia.org/resource/Clint_Eastwood> ; \n" + 545 "dbpedia:starring ?c . }"; 546 547 queryStr = "SELECT DISTINCT *\n" + 548 "WHERE\n" + 549 " { ?company a <http://dbpedia.org/ontology/Organisation> ;\n" + 550 " <http://dbpedia.org/ontology/foundationPlace> <http://dbpedia.org/resource/California> .\n" + 551 " ?product <http://dbpedia.org/ontology/developer> ?company ;\n" + 552 " a <http://dbpedia.org/ontology/Software>\n" + 553 " }"; 554 555 queryStr = "PREFIX : <http://dbpedia.org/resource/>\n" + 556 "PREFIX dbo: <http://dbpedia.org/ontology/>\n" + 557 "PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" + 558 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + 559 "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + 560 "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n" + 561 "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + 562 "PREFIX dbpedia: <http://dbpedia.org/>\n" + 563 "PREFIX dbpedia2: <http://dbpedia.org/property/>\n" + 564 "PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n" + 565 "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n" + 566 "\n" + 567 "SELECT DISTINCT ?name ?birth ?death ?person\n" + 568 "WHERE\n" + 569 " { ?person dbpedia2:birthPlace :France ;\n" + 570 " dbo:birthDate ?birth ;\n" + 571 " foaf:name ?name ;\n" + 572 " dbo:deathDate ?death\n" + 573 " }\n" + 574 "ORDER BY ?name"; 575 queryStr = "PREFIX : <http://dbpedia.org/resource/>\n" + 576 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + 577 "PREFIX db-ont: <http://dbpedia.org/ontology/>\n" + 578 "PREFIX dbpedia: <http://dbpedia.org/>\n" + 579 "PREFIX dbpedia2: <http://dbpedia.org/property/>\n" + 580 "SELECT DISTINCT *\n" + 581 "WHERE\n" + 582 " { ?select rdf:type db-ont:Film ;\n" + 583 " dbpedia2:title ?ft .\n" + 584 " ?id db-ont:imdbId ?imdb_id\n" + 585 " }"; 586 587 queryStr = "SELECT DISTINCT *\n" + 588 "WHERE\n" + 589 " { <http://dbpedia.org/resource/United_States>\n" + 590 " a <http://dbpedia.org/ontology/Country> ;\n" + 591 " ?p ?o\n" + 592 " }"; 593 594 queryStr = "PREFIX : <http://dbpedia.org/resource/>\n" + 595 "PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>\n" + 596 "PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" + 597 "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" + 598 "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + 599 "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n" + 600 "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + 601 "PREFIX dbpedia: <http://dbpedia.org/>\n" + 602 "PREFIX dbpedia2: <http://dbpedia.org/property/>\n" + 603 "PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n" + 604 "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n" + 605 "\n" + 606 "SELECT DISTINCT ?homepage\n" + 607 "WHERE\n" + 608 " { ?person rdf:type dbpedia-owl:Place ;\n" + 609 " foaf:homepage ?homepage\n" + 610 " }"; 611 612 queryStr = "PREFIX property: <http://dbpedia.org/property/>\n" + 613 "PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n" + 614 "PREFIX db: <http://dbpedia.org/ontology/>\n" + 615 "\n" + 616 "SELECT DISTINCT *\n" + 617 "WHERE\n" + 618 " { ?musician a db:MusicalArtist ;\n" + 619 " db:activeYearsStartYear ?activeyearsstartyear ;\n" + 620 " db:associatedBand ?associatedband ;\n" + 621 " db:birthPlace ?birthplace ;\n" + 622 " db:genre ?genre ;\n" + 623 " db:recordLabel ?recordlable\n" + 624 " }"; 625 626 int limit = 10; 627 628 Query query = QueryFactory.create(queryStr); 629 query.setOffset(0); 630 query.setLimit(limit); 631 632 System.out.println("Input query:\n" + query); 633 634 String baseIRI = DBpedia.BASE_IRI; 635 PrefixMapping pm = DBpedia.PM; 636 637 SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); 638 endpoint = SparqlEndpoint.create("http://localhost:7200/repositories/dbpedia?infer=false", Collections.emptyList()); 639 SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); 640 ks.init(); 641 AbstractReasonerComponent reasoner = new SPARQLReasoner(ks); 642 reasoner.init(); 643 reasoner.prepareSubsumptionHierarchy(); 644 645 List<List<Node>> tuples = new ArrayList<>(); 646 QueryExecutionFactory qef = ks.getQueryExecutionFactory(); 647 try(QueryExecution qe = qef.createQueryExecution(query)) { 648 List<Var> projectVars = query.getProjectVars(); 649 ResultSet rs = qe.execSelect(); 650 while(rs.hasNext()) { 651 QuerySolution qs = rs.next(); 652 List<Node> tuple = new ArrayList<>(); 653 projectVars.forEach(var -> tuple.add(qs.get(var.getName()).asNode())); 654 tuples.add(tuple); 655 } 656 } catch (Exception e) { 657 e.printStackTrace(); 658 } 659 660// tuples = Lists.newArrayList( 661// Lists.newArrayList( 662// NodeFactory.createURI("http://dbpedia.org/resource/Brad_Pitt"), 663// NodeFactory.createLiteral("1963-12-18", XSDDatatype.XSDdate)), 664// Lists.newArrayList( 665// NodeFactory.createURI("http://dbpedia.org/resource/Tom_Hanks"), 666// NodeFactory.createLiteral("1956-07-09", XSDDatatype.XSDdate)) 667// ); 668 669 Set<String> ignoredProperties = DBpedia.BLACKLIST_PROPERTIES; 670 671 ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(ks.getQueryExecutionFactory()); 672 cbdGen.setIgnoredProperties(ignoredProperties); 673 cbdGen.setAllowedPropertyNamespaces(Sets.newHashSet("http://dbpedia.org/ontology/", "http://dbpedia.org/property/")); 674 cbdGen.setAllowedClassNamespaces(Sets.newHashSet("http://dbpedia.org/ontology/")); 675 676 QueryTreeFactory tf = new QueryTreeFactoryBaseInv(); 677 tf.setMaxDepth(2); 678 679 int depth = 1; 680 681 QTLTuples qtl = new QTLTuples(qef); 682 qtl.setMaxTreeDepth(depth); 683 qtl.setBaseIRI(baseIRI); 684 qtl.setPrefixMapping(pm); 685 qtl.setCBDGenerator(cbdGen); 686 qtl.setTreeFactory(tf); 687 688 689 List<AbstractTreeFilter<RDFResourceTree>> filters = Lists.newArrayList( 690 new PredicateExistenceFilterDBpedia(ks) 691 ,new MostSpecificTypesFilter(reasoner) 692// ,new PredicateExistenceFilter() { 693// @Override 694// public boolean isMeaningless(Node predicate) { 695// return predicate.getURI().startsWith("http://dbpedia.org/property/") || 696// predicate.getURI().startsWith("http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#")|| 697// predicate.getURI().startsWith("http://www.wikidata.org/entity/") || 698// predicate.getURI().startsWith(RDFS.getURI()); 699// } 700// } 701 ); 702 703 List<Map.Entry<RDFResourceTree, List<Node>>> solutions = qtl.run(tuples); 704 705 solutions.forEach(sol -> { 706 RDFResourceTree tree = sol.getKey(); 707 List<Node> nodes2Select = sol.getValue(); 708 QueryTreeUtils.rebuildNodeIDs(tree); 709 710 System.out.println("LGG\n" + tree.getStringRepresentation( 711 true, 712 RDFResourceTree.Rendering.INDENTED, baseIRI, pm, true)); 713 714 System.out.println("nodes to select:" + nodes2Select); 715 for (AbstractTreeFilter<RDFResourceTree> filter : filters) { 716 filter.setNodes2Keep(nodes2Select); 717 tree = filter.apply(tree); 718 } 719 QueryTreeUtils.rebuildNodeIDs(tree); 720 721 System.out.println("LGG (filtered)\n" + tree.getStringRepresentation( 722 false, 723 RDFResourceTree.Rendering.INDENTED, baseIRI, pm, true)); 724 tree = new SymmetricPredicatesFilter(Collections.singleton(NodeFactory.createURI("http://dbpedia.org/ontology/spouse"))).apply(tree); 725 726 String learnedQuery = QueryTreeUtils.toSPARQLQueryString(tree, nodes2Select, baseIRI, pm); 727 Query q = QueryFactory.create(learnedQuery); 728 QueryUtils.prunePrefixes(q); 729 System.out.println(q); 730 731 Graph<Vertex, Edge> g = QueryTreeUtils.toGraph(tree, baseIRI, pm); 732 733 QueryTreeUtils.asGraph(tree, baseIRI, pm, new File(System.getProperty("java.io.tmpdir") + File.separator + "lgg.graphml")); 734 735 }); 736 737 738 } 739}