001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.utilities; 020 021import com.google.common.collect.HashMultimap; 022import com.google.common.collect.Multimap; 023import com.google.common.collect.Sets; 024import com.mxgraph.layout.hierarchical.mxHierarchicalLayout; 025import com.mxgraph.util.mxCellRenderer; 026import com.mxgraph.util.mxConstants; 027import com.mxgraph.util.png.mxPngEncodeParam; 028import com.mxgraph.util.png.mxPngImageEncoder; 029import com.mxgraph.view.mxStylesheet; 030import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 031import org.apache.commons.collections15.ListUtils; 032import org.apache.jena.datatypes.RDFDatatype; 033import org.apache.jena.graph.Node; 034import org.apache.jena.graph.Triple; 035import org.apache.jena.query.*; 036import org.apache.jena.rdf.model.impl.Util; 037import org.apache.jena.shared.PrefixMapping; 038import org.apache.jena.sparql.core.TriplePath; 039import org.apache.jena.sparql.core.Var; 040import org.apache.jena.sparql.syntax.*; 041import org.apache.jena.sparql.util.VarUtils; 042import org.apache.jena.vocabulary.RDF; 043import org.dllearner.kb.sparql.CBDStructureTree; 044import org.jgrapht.ext.JGraphXAdapter; 045import org.jgrapht.graph.DefaultDirectedGraph; 046import org.jgrapht.graph.DefaultEdge; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import java.awt.*; 051import java.awt.image.BufferedImage; 052import java.io.File; 053import java.io.FileOutputStream; 054import java.io.IOException; 055import java.util.*; 056import java.util.List; 057import java.util.Map.Entry; 058import java.util.stream.Collectors; 059import java.util.stream.Stream; 060 061/** 062 * Utilities for SPARQL queries. 063 */ 064public class QueryUtils extends ElementVisitorBase { 065 066private static final Logger logger = LoggerFactory.getLogger(QueryUtils.class); 067 068 private static final ParameterizedSparqlString superClassesQueryTemplate = new ParameterizedSparqlString( 069 "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " 070 + "SELECT ?sup WHERE {" 071 + "?sub ((rdfs:subClassOf|owl:equivalentClass)|^owl:equivalentClass)+ ?sup .}"); 072 073 private Set<Triple> triplePattern; 074 private Set<Triple> optionalTriplePattern; 075 076 private boolean inOptionalClause = false; 077 078 private int unionCount = 0; 079 private int optionalCount = 0; 080 private int filterCount = 0; 081 082 private Map<Triple, ElementGroup> triple2Parent = new HashMap<>(); 083 084 Stack<ElementGroup> parents = new Stack<>(); 085 086 public static String addPrefix(String queryString, Map<String, String> prefix2Namespace){ 087 Query query = QueryFactory.create(queryString); 088 for (Entry<String, String> entry : prefix2Namespace.entrySet()) { 089 String prefix = entry.getKey(); 090 String namespace = entry.getValue(); 091 query.setPrefix(prefix, namespace); 092 } 093 return query.toString(); 094 } 095 096 public static String addPrefixes(String queryString, String prefix, String namespace){ 097 Query query = QueryFactory.create(queryString); 098 query.setPrefix(prefix, namespace); 099 return query.toString(); 100 } 101 102 /** 103 * Remove unused prefix declarations from preamble. 104 * 105 * @param query the query 106 */ 107 public static void prunePrefixes(Query query) { 108 PrefixMapping pm = query.getPrefixMapping(); 109 String baseURI = query.getBaseURI(); 110 query.setBaseURI((String) null); 111 112 Set<String> usedPrefixes = Sets.newHashSet(); 113 getNodes(query).forEach(node -> { 114 String ns = null; 115 if(node.isURI()) { 116 ns = node.getNameSpace(); 117 } else if(node.isLiteral()) { 118 RDFDatatype dt = node.getLiteralDatatype(); 119 if(dt != null) { 120 String uri = dt.getURI(); 121 ns = uri.substring(0, Util.splitNamespaceXML(uri)); 122 } 123 } 124 125 if(ns != null) { 126 // check if base URI has been used 127 if(ns.equals(baseURI)) { 128 query.setBaseURI(baseURI); 129 } 130 // check if one of the prefixes has been used 131 String prefix = pm.getNsURIPrefix(ns); 132 if(prefix != null) { 133 usedPrefixes.add(prefix); 134 } 135 } 136 }); 137 // override prefix map 138 Map<String, String> prefixMap = pm.getNsPrefixMap(); 139 prefixMap.entrySet().removeIf(mapping -> !usedPrefixes.contains(mapping.getKey())); 140 pm.clearNsPrefixMap(); 141 pm.setNsPrefixes(prefixMap); 142 } 143 144 /** 145 * @param query the query 146 * @return all nodes that occur in triple patterns of the query 147 */ 148 public static Set<Node> getNodes(Query query) { 149 return getTriplePatterns(query).stream() 150 .map(QueryUtils::getNodes) 151 .flatMap(Collection::stream) 152 .collect(Collectors.toSet()); 153 } 154 155 /** 156 * Convert triple pattern to a set of nodes {s, p, o} 157 * 158 * @param t the triple pattern 159 * @return set of nodes {s, p, o} 160 */ 161 public static Set<Node> getNodes(Triple t) { 162 return Sets.newHashSet(t.getSubject(), t.getPredicate(), t.getObject()); 163 } 164 165 /** 166 * Returns all variables that occur in a triple pattern of the SPARQL query. 167 * @param query the query 168 * @return 169 */ 170 public Set<Var> getVariables(Query query){ 171 Set<Var> vars = new HashSet<>(); 172 173 Set<Triple> triplePatterns = extractTriplePattern(query, false); 174 175 for (Triple tp : triplePatterns) { 176 if(tp.getSubject().isVariable()){ 177 vars.add(Var.alloc(tp.getSubject())); 178 } else if(tp.getObject().isVariable()){ 179 vars.add(Var.alloc(tp.getObject())); 180 } else if(tp.getPredicate().isVariable()){ 181 vars.add(Var.alloc(tp.getPredicate())); 182 } 183 } 184 185 return vars; 186 } 187 188 /** 189 * Returns all variables that occur as subject in a triple pattern of the SPARQL query. 190 * @param query the query 191 * @return 192 */ 193 public Set<Var> getSubjectVariables(Query query){ 194 195 Set<Triple> triplePatterns = extractTriplePattern(query, false); 196 197 Set<Var> vars = new HashSet<>(); 198 for (Triple tp : triplePatterns) { 199 if(tp.getSubject().isVariable()){ 200 vars.add(Var.alloc(tp.getSubject())); 201 } 202 } 203 204 return vars; 205 } 206 207 /** 208 * Returns all variables that occur as subject in a triple pattern of the SPARQL query. 209 * @param query the query 210 * @return 211 */ 212 public static Set<Var> getSubjectVars(Query query){ 213 final Set<Var> vars = new HashSet<>(); 214 215 ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){ 216 @Override 217 public void visit(ElementTriplesBlock el) { 218 Iterator<Triple> triples = el.patternElts(); 219 while (triples.hasNext()) { 220 Triple triple = triples.next(); 221 if(triple.getSubject().isVariable()) { 222 vars.add(Var.alloc(triples.next().getSubject())); 223 } 224 } 225 } 226 227 @Override 228 public void visit(ElementPathBlock el) { 229 Iterator<TriplePath> triples = el.patternElts(); 230 while (triples.hasNext()) { 231 TriplePath triple = triples.next(); 232 if(triple.getSubject().isVariable()) { 233 vars.add(Var.alloc(triples.next().getSubject())); 234 } 235 } 236 } 237 }); 238 239 return vars; 240 } 241 242 public static Set<Triple> getTriplePatterns(Query query){ 243 final Set<Triple> triplePatterns = Sets.newHashSet(); 244 245 ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){ 246 @Override 247 public void visit(ElementTriplesBlock el) { 248 Iterator<Triple> triples = el.patternElts(); 249 while (triples.hasNext()) { 250 Triple triple = triples.next(); 251 triplePatterns.add(triple); 252 } 253 } 254 255 @Override 256 public void visit(ElementPathBlock el) { 257 Iterator<TriplePath> triplePaths = el.patternElts(); 258 while (triplePaths.hasNext()) { 259 TriplePath tp = triplePaths.next(); 260 if(tp.isTriple()) { 261 Triple triple = tp.asTriple(); 262 triplePatterns.add(triple); 263 } 264 } 265 } 266 }); 267 return triplePatterns; 268 } 269 270 /** 271 * Given a SPARQL query and a start node, return the outgoing 272 * triple patterns. 273 * @param query the query 274 * @param source the start node 275 * @return 276 */ 277 public static Set<Triple> getOutgoingTriplePatterns(Query query, final Node source){ 278 final Set<Triple> outgoingTriples = Sets.newHashSet(); 279 280 ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){ 281 @Override 282 public void visit(ElementTriplesBlock el) { 283 Iterator<Triple> triples = el.patternElts(); 284 while (triples.hasNext()) { 285 Triple triple = triples.next(); 286 Node subject = triple.getSubject(); 287 if(subject.equals(source)) { 288 outgoingTriples.add(triple); 289 } 290 } 291 } 292 293 @Override 294 public void visit(ElementPathBlock el) { 295 Iterator<TriplePath> triplePaths = el.patternElts(); 296 while (triplePaths.hasNext()) { 297 TriplePath tp = triplePaths.next(); 298 if(tp.isTriple()) { 299 Triple triple = tp.asTriple(); 300 Node subject = triple.getSubject(); 301 if(subject.equals(source)) { 302 outgoingTriples.add(triple); 303 } 304 } 305 } 306 } 307 }); 308 309 return outgoingTriples; 310 } 311 312 /** 313 * Given a SPARQL query and a start node, return the maximum subject-object 314 * join depth. 315 * @param query the query 316 * @param source the start node 317 * @return 318 */ 319 public static int getSubjectObjectJoinDepth(Query query, final Node source){ 320 final Set<Triple> outgoingTriples = Sets.newHashSet(); 321 322 ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){ 323 @Override 324 public void visit(ElementTriplesBlock el) { 325 Iterator<Triple> triples = el.patternElts(); 326 while (triples.hasNext()) { 327 Triple triple = triples.next(); 328 Node subject = triple.getSubject(); 329 if(subject.equals(source) && triple.getObject().isVariable()) { 330 outgoingTriples.add(triple); 331 } 332 } 333 } 334 335 @Override 336 public void visit(ElementPathBlock el) { 337 Iterator<TriplePath> triplePaths = el.patternElts(); 338 while (triplePaths.hasNext()) { 339 TriplePath tp = triplePaths.next(); 340 if(tp.isTriple()) { 341 Triple triple = tp.asTriple(); 342 Node subject = triple.getSubject(); 343 if(subject.equals(source) && triple.getObject().isVariable()) { 344 outgoingTriples.add(triple); 345 } 346 } 347 } 348 } 349 }); 350 351 int maxDepth = 0; 352 for (Triple triple : outgoingTriples) { 353 maxDepth = Math.max(maxDepth, 1 + getSubjectObjectJoinDepth(query, triple.getObject())); 354 } 355 356 return maxDepth; 357 } 358 359 public static CBDStructureTree getOptimalCBDStructure(Query query) { 360 CBDStructureTree tree = new CBDStructureTree("root"); 361 362 Var var = query.getProjectVars().get(0); 363 364 getOptimalCBDStructure(query, tree, var.asNode(), null, ""); 365 366 return tree; 367 } 368 369 private static void getOptimalCBDStructure(Query query, CBDStructureTree structureTree, Node current, Node parent, String direction) { 370 QueryUtils utils = new QueryUtils(); 371 372 // traverse the outgoing paths 373 Set<Triple> tmp = utils.extractOutgoingTriplePatterns(query, current) 374 .stream() 375 .filter(tp -> !direction.equals("in") || !tp.getObject().matches(parent)) 376 .collect(Collectors.toSet()); 377 if(!tmp.isEmpty()) { 378 List<CBDStructureTree> outChildren = structureTree.getChildren().stream().filter(CBDStructureTree::isOutNode).collect(Collectors.toList()); 379 CBDStructureTree outChild; 380 if(outChildren.isEmpty()) { 381 outChild = structureTree.addOutNode(); 382 } else { 383 outChild = outChildren.get(0); 384 } 385 tmp.stream() 386 .filter(tp -> tp.getObject().isVariable()) 387 .map(Triple::getObject) 388 .forEach(node -> getOptimalCBDStructure(query, outChild, node, current, "out")); 389 } 390 // traverse the incoming paths 391 tmp = utils.extractIncomingTriplePatterns(query, current) 392 .stream() 393 .filter(tp -> !direction.equals("out") || !tp.getSubject().matches(parent)) 394 .collect(Collectors.toSet()); 395 if(!tmp.isEmpty()) { 396 CBDStructureTree inChild = structureTree.addInNode(); 397 398 tmp.stream() 399 .filter(tp -> tp.getSubject().isVariable()) 400 .map(Triple::getSubject) 401 .forEach(node -> getOptimalCBDStructure(query, inChild, node, current, "in")); 402 } 403 } 404 405 /** 406 * Returns all variables that occur as object in a triple pattern of the SPARQL query. 407 * @param query the query 408 * @return 409 */ 410 public static Set<Var> getObjectVars(Query query){ 411 return getTriplePatterns(query).stream() 412 .filter(tp -> tp.getObject().isVariable()) 413 .map(tp -> Var.alloc(tp.getObject())) 414 .collect(Collectors.toSet()); 415 } 416 417 /** 418 * Returns all variables that occur as subject in a triple pattern of the SPARQL query. 419 * @param query the query 420 * @return 421 */ 422 public Set<Var> getObjectVariables(Query query){ 423 return extractTriplePattern(query, false).stream() 424 .filter(tp -> tp.getObject().isVariable()) 425 .map(tp -> Var.alloc(tp.getObject())) 426 .collect(Collectors.toSet()); 427 } 428 429 /** 430 * Returns all triple patterns in given SPARQL query that have the given node in subject position, i.e. the outgoing 431 * triple patterns. 432 * @param query The SPARQL query. 433 * @param node the node 434 * @return 435 */ 436 public Set<Triple> extractOutgoingTriplePatterns(Query query, Node node){ 437 return extractTriplePattern(query, false).stream() 438 .filter(t -> t.subjectMatches(node)) 439 .collect(Collectors.toSet()); 440 } 441 442 public Set<Triple> extractOutgoingTriplePatternsTrans(Query query, Node node) { 443 return Stream.concat(extractOutgoingTriplePatterns(query, node).stream(), 444 extractOutgoingTriplePatterns(query, node).stream() 445 .map(tp -> extractOutgoingTriplePatternsTrans(query, tp.getObject())) 446 .flatMap(set -> set.stream())) 447 .collect(Collectors.toSet()); 448 } 449 450 /** 451 * Returns all triple patterns in given SPARQL query that have the given node in object position, i.e. the incoming 452 * triple patterns. 453 * @param query The SPARQL query. 454 * @param node the node 455 * @return 456 */ 457 public Set<Triple> extractIncomingTriplePatterns(Query query, Node node){ 458 return extractTriplePattern(query, false).stream() 459 .filter(tp -> tp.objectMatches(node)) 460 .collect(Collectors.toSet()); 461 } 462 463 public Set<Triple> extractIncomingTriplePatternsTrans(Query query, Node node) { 464 return Stream.concat(extractIncomingTriplePatterns(query, node).stream(), 465 extractIncomingTriplePatterns(query, node).stream() 466 .map(tp -> extractIncomingTriplePatternsTrans(query, tp.getSubject())) 467 .flatMap(set -> set.stream())) 468 .collect(Collectors.toSet()); 469 } 470 471 /** 472 * Returns all triple patterns in given SPARQL query that have the given node either in subject or in object position, i.e. 473 * the ingoing and outgoing triple patterns. 474 * @param query The SPARQL query. 475 * @param node the node 476 * @return 477 */ 478 public Set<Triple> extractTriplePatterns(Query query, Node node){ 479 Set<Triple> triplePatterns = new HashSet<>(); 480 triplePatterns.addAll(extractIncomingTriplePatterns(query, node)); 481 triplePatterns.addAll(extractOutgoingTriplePatterns(query, node)); 482 return triplePatterns; 483 } 484 485 /** 486 * Returns all triple patterns in given SPARQL query that contain the 487 * given predicate. 488 * @param query the SPARQL query. 489 * @param predicate the predicate 490 * @return 491 */ 492 public Set<Triple> extractTriplePatternsWithPredicate(Query query, Node predicate){ 493 // get all triple patterns 494 Set<Triple> triplePatterns = extractTriplePattern(query); 495 496 // filter by predicate 497 triplePatterns.removeIf(tp -> !tp.predicateMatches(predicate)); 498 499 return triplePatterns; 500 } 501 502 /** 503 * Returns all triple patterns in given SPARQL query that have the given node either in subject or in object position, i.e. 504 * the incoming and outgoing triple patterns. 505 * @param query The SPARQL query. 506 * @param node the node 507 * @return 508 */ 509 public Set<Triple> extractNonOptionalTriplePatterns(Query query, Node node){ 510 Set<Triple> triplePatterns = new HashSet<>(); 511 triplePatterns.addAll(extractIncomingTriplePatterns(query, node)); 512 triplePatterns.addAll(extractOutgoingTriplePatterns(query, node)); 513 triplePatterns.removeAll(optionalTriplePattern); 514 return triplePatterns; 515 } 516 517 /** 518 * Returns triple patterns for each projection variable v such that v is either in subject or object position. 519 * @param query The SPARQL query. 520 * @return 521 */ 522 public Map<Var,Set<Triple>> extractTriplePatternsForProjectionVars(Query query){ 523 Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>(); 524 for (Var var : query.getProjectVars()) { 525 Set<Triple> triplePatterns = new HashSet<>(); 526 triplePatterns.addAll(extractIncomingTriplePatterns(query, var)); 527 triplePatterns.addAll(extractOutgoingTriplePatterns(query, var)); 528 var2TriplePatterns.put(var, triplePatterns); 529 } 530 return var2TriplePatterns; 531 } 532 533 /** 534 * Returns triple patterns for each projection variable v such that v is in subject position. 535 * @param query The SPARQL query. 536 * @return 537 */ 538 public Map<Var,Set<Triple>> extractOutgoingTriplePatternsForProjectionVars(Query query){ 539 Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>(); 540 for (Var var : query.getProjectVars()) { 541 Set<Triple> triplePatterns = new HashSet<>(extractOutgoingTriplePatterns(query, var)); 542 var2TriplePatterns.put(var, triplePatterns); 543 } 544 return var2TriplePatterns; 545 } 546 547 /** 548 * @return the optionalTriplePattern 549 */ 550 public Set<Triple> getOptionalTriplePatterns() { 551 return optionalTriplePattern; 552 } 553 554 /** 555 * Returns triple patterns for each projection variable v such that v is in subject position. 556 * @param query The SPARQL query. 557 * @return 558 */ 559 public Map<Var,Set<Triple>> extractIncomingTriplePatternsForProjectionVars(Query query){ 560 Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>(); 561 for (Var var : query.getProjectVars()) { 562 Set<Triple> triplePatterns = new HashSet<>(extractIncomingTriplePatterns(query, var)); 563 var2TriplePatterns.put(var, triplePatterns); 564 } 565 return var2TriplePatterns; 566 } 567 568 /** 569 * Returns triple patterns for each projection variable v such that v is in object position. 570 * @param query The SPARQL query. 571 * @return 572 */ 573 public Map<Var,Set<Triple>> extractIngoingTriplePatternsForProjectionVars(Query query){ 574 Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>(); 575 for (Var var : query.getProjectVars()) { 576 Set<Triple> triplePatterns = new HashSet<>(extractIncomingTriplePatterns(query, var)); 577 var2TriplePatterns.put(var, triplePatterns); 578 } 579 return var2TriplePatterns; 580 } 581 582 public Set<Triple> extractTriplePattern(Query query){ 583 return extractTriplePattern(query, false); 584 } 585 586 public Set<Triple> extractTriplePattern(Query query, boolean ignoreOptionals){ 587 triplePattern = new HashSet<>(); 588 optionalTriplePattern = new HashSet<>(); 589 590 query.getQueryPattern().visit(this); 591 592 //postprocessing: triplepattern in OPTIONAL clause 593 if(!ignoreOptionals){ 594 if(query.isSelectType()){ 595 for(Triple t : optionalTriplePattern){ 596 if(!ListUtils.intersection(new ArrayList<>(VarUtils.getVars(t)), query.getProjectVars()).isEmpty()){ 597 triplePattern.add(t); 598 } 599 } 600 } 601 } 602 return triplePattern; 603 } 604 605 public boolean isOptional(Triple triple){ 606 return optionalTriplePattern.contains(triple); 607 } 608 609 public Set<Triple> extractTriplePattern(ElementGroup group){ 610 return extractTriplePattern(group, false); 611 } 612 613 public Set<Triple> extractTriplePattern(ElementGroup group, boolean ignoreOptionals){ 614 triplePattern = new HashSet<>(); 615 optionalTriplePattern = new HashSet<>(); 616 617 group.visit(this); 618 619 //postprocessing: triplepattern in OPTIONAL clause 620 if(!ignoreOptionals){ 621 triplePattern.addAll(optionalTriplePattern); 622 } 623 624 return triplePattern; 625 } 626 627 public Query removeUnboundObjectVarTriples(Query query) { 628 QueryUtils queryUtils = new QueryUtils(); 629 630 Var rootVar = query.getProjectVars().get(0); 631 632 // 1. outgoing triple paths pruning 633 Set<Triple> outgoingTriplePatterns = queryUtils.extractOutgoingTriplePatternsTrans(query, rootVar); 634 Multimap<Var, Triple> var2OutgoingTriplePatterns = HashMultimap.create(); 635 636 // mapping from variable to triple pattern 637 for (Triple tp : outgoingTriplePatterns) { 638 var2OutgoingTriplePatterns.put(Var.alloc(tp.getSubject()), tp); 639 } 640 641 // remove triple patterns with object is var node and leaf node 642 Iterator<Triple> iterator = outgoingTriplePatterns.iterator(); 643 while (iterator.hasNext()) { 644 Triple triple = iterator.next(); 645 Node object = triple.getObject(); 646 if(object.isVariable() && !var2OutgoingTriplePatterns.containsKey(Var.alloc(object))) { 647 iterator.remove(); 648 } 649 } 650 651 // 2. incoming triple paths pruning 652 Set<Triple> incomingTriplePatterns = queryUtils.extractIncomingTriplePatternsTrans(query, rootVar); 653 Multimap<Var, Triple> var2IncomingTriplePatterns = HashMultimap.create(); 654 655 // mapping from variable to triple pattern 656 for (Triple tp : incomingTriplePatterns) { 657 var2IncomingTriplePatterns.put(Var.alloc(tp.getObject()), tp); 658 } 659 660 // remove triple patterns with object is var node and leaf node 661 iterator = incomingTriplePatterns.iterator(); 662 while (iterator.hasNext()) { 663 Triple triple = iterator.next(); 664 Node s = triple.getSubject(); 665 if(s.isVariable() && !var2IncomingTriplePatterns.containsKey(Var.alloc(s))) { 666 iterator.remove(); 667 } 668 } 669 670 Query newQuery = new Query(); 671 newQuery.addProjectVars(query.getProjectVars()); 672 ElementTriplesBlock el = new ElementTriplesBlock(); 673 for (Triple triple : Sets.union(outgoingTriplePatterns, incomingTriplePatterns)) { 674 el.addTriple(triple); 675 } 676 newQuery.setQuerySelectType(); 677 newQuery.setDistinct(true); 678 newQuery.setQueryPattern(el); 679 680 return newQuery; 681 } 682 683 /** 684 * Removes triple patterns of form (s rdf:type A) if there exists a 685 * triple pattern (s rdf:type B) such that the underlying 686 * knowledge base entails (B rdfs:subClassOf A). 687 * @param qef the query execution factory 688 * @param query the query 689 */ 690 public void filterOutGeneralTypes(QueryExecutionFactory qef, Query query){ 691 // extract all rdf:type triple patterns 692 Set<Triple> typeTriplePatterns = extractTriplePatternsWithPredicate(query, RDF.type.asNode()); 693 694 // group by subject 695 Multimap<Node, Triple> subject2TriplePatterns = HashMultimap.create(); 696 for (Triple tp : typeTriplePatterns) { 697 subject2TriplePatterns.put(tp.getSubject(), tp); 698 } 699 700 // keep the most specific types for each subject 701 for (Node subject : subject2TriplePatterns.keySet()) { 702 Collection<Triple> triplePatterns = subject2TriplePatterns.get(subject); 703 Collection<Triple> triplesPatterns2Remove = new HashSet<>(); 704 705 for (Triple tp : triplePatterns) { 706 if(!triplesPatterns2Remove.contains(tp)) { 707 // get all super classes for the triple object 708 Set<Node> superClasses = getSuperClasses(qef, tp.getObject()); 709 710 // remove triple patterns that have one of the super classes as object 711 for (Triple tp2 : triplePatterns) { 712 if(tp2 != tp && superClasses.contains(tp2.getObject())) { 713 triplesPatterns2Remove.add(tp2); 714 } 715 } 716 } 717 } 718 719 // remove triple patterns 720 triplePatterns.removeAll(triplesPatterns2Remove); 721 } 722 } 723 724 public static DefaultDirectedGraph<Node, LabeledEdge> asJGraphT(Query query) { 725 QueryUtils utils = new QueryUtils(); 726 727 Set<Triple> tps = utils.extractTriplePattern(query); 728 729 DefaultDirectedGraph<Node, LabeledEdge> g = new DefaultDirectedGraph<>(LabeledEdge.class); 730 731 tps.forEach(tp -> { 732 g.addVertex(tp.getSubject()); 733 g.addVertex(tp.getObject()); 734 g.addEdge(tp.getSubject(), tp.getObject(), new LabeledEdge(tp.getSubject(), tp.getObject(), tp.getPredicate())); 735 }); 736 737 return g; 738 } 739 740 public static void exportAsGraph(Query query, File file) { 741 DefaultDirectedGraph<Node, LabeledEdge> g = asJGraphT(query); 742 System.out.println(g.edgeSet().size()); 743 744 JGraphXAdapter adapter = new JGraphXAdapter(g); 745 746 747 // positioning via jgraphx layouts 748 mxHierarchicalLayout layout = new mxHierarchicalLayout(adapter); 749 layout.execute(adapter.getDefaultParent()); 750 751 Map<String, Object> edgeStyle = new HashMap<>(); 752//edgeStyle.put(mxConstants.STYLE_EDGE, mxConstants.EDGESTYLE_ORTHOGONAL); 753 edgeStyle.put(mxConstants.STYLE_SHAPE, mxConstants.SHAPE_CONNECTOR); 754 edgeStyle.put(mxConstants.STYLE_ENDARROW, mxConstants.ARROW_CLASSIC); 755 edgeStyle.put(mxConstants.STYLE_STROKECOLOR, "#000000"); 756 edgeStyle.put(mxConstants.STYLE_FONTCOLOR, "#000000"); 757 edgeStyle.put(mxConstants.STYLE_LABEL_BACKGROUNDCOLOR, "#ffffff"); 758 759 Map<String, Object> nodeStyle = new HashMap<>(); 760 nodeStyle.put(mxConstants.STYLE_SHAPE, mxConstants.SHAPE_ELLIPSE); 761 nodeStyle.put(mxConstants.STYLE_VERTICAL_ALIGN, mxConstants.ALIGN_MIDDLE); 762 763 mxStylesheet stylesheet = new mxStylesheet(); 764 stylesheet.setDefaultEdgeStyle(edgeStyle); 765 stylesheet.setDefaultVertexStyle(nodeStyle); 766 767 adapter.setStylesheet(stylesheet); 768 769// JFrame frame = new JFrame(); 770// frame.getContentPane().add(new mxGraphComponent(adapter)); 771// frame.pack(); 772// frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); 773// frame.setVisible(true); 774 775 776 777 BufferedImage image = mxCellRenderer.createBufferedImage(adapter, null, 1, Color.WHITE, true, null); 778 mxPngEncodeParam param = mxPngEncodeParam.getDefaultEncodeParam(image); 779 780 781 try { 782 FileOutputStream outputStream = new FileOutputStream(file); 783 mxPngImageEncoder encoder = new mxPngImageEncoder(outputStream, param); 784 if (image != null) { 785 encoder.encode(image); 786 } 787 outputStream.close(); 788// ImageIO.write(image, "PNG", file); 789 } catch (IOException e) { 790 e.printStackTrace(); 791 } 792 793 } 794 795 private Set<Node> getSuperClasses(QueryExecutionFactory qef, Node cls){ 796 Set<Node> superClasses = new HashSet<>(); 797 798 superClassesQueryTemplate.setIri("sub", cls.getURI()); 799 800 String query = superClassesQueryTemplate.toString(); 801 802 try { 803 QueryExecution qe = qef.createQueryExecution(query); 804 ResultSet rs = qe.execSelect(); 805 while(rs.hasNext()){ 806 QuerySolution qs = rs.next(); 807 superClasses.add(qs.getResource("sup").asNode()); 808 } 809 qe.close(); 810 } catch (Exception e) { 811 logger.error("ERROR. Getting super classes of " + cls + " failed.", e); 812 } 813 814 return superClasses; 815 } 816 817 @Override 818 public void visit(ElementGroup el) { 819 parents.push(el); 820 for (Element e : el.getElements()) { 821 e.visit(this); 822 } 823 parents.pop(); 824 } 825 826 @Override 827 public void visit(ElementOptional el) { 828 optionalCount++; 829 inOptionalClause = true; 830 el.getOptionalElement().visit(this); 831 inOptionalClause = false; 832 } 833 834 @Override 835 public void visit(ElementTriplesBlock el) { 836 for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) { 837 Triple t = iterator.next(); 838 if(inOptionalClause){ 839 optionalTriplePattern.add(t); 840 } else { 841 triplePattern.add(t); 842 } 843 if(!parents.isEmpty()){ 844 triple2Parent.put(t, parents.peek()); 845 } 846 } 847 } 848 849 @Override 850 public void visit(ElementPathBlock el) { 851 for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) { 852 TriplePath tp = iterator.next(); 853 if(inOptionalClause){ 854 if(tp.isTriple()){ 855 optionalTriplePattern.add(tp.asTriple()); 856 if(!parents.isEmpty()){ 857 triple2Parent.put(tp.asTriple(), parents.peek()); 858 } 859 } 860 } else { 861 if(tp.isTriple()){ 862 triplePattern.add(tp.asTriple()); 863 if(!parents.isEmpty()){ 864 triple2Parent.put(tp.asTriple(), parents.peek()); 865 } 866 } 867 } 868 869 } 870 } 871 872 @Override 873 public void visit(ElementUnion el) { 874 unionCount++; 875 for (Element e : el.getElements()) { 876 e.visit(this); 877 } 878 } 879 880 @Override 881 public void visit(ElementFilter el) { 882 filterCount++; 883 } 884 885 public int getUnionCount() { 886 return unionCount; 887 } 888 889 public int getOptionalCount() { 890 return optionalCount; 891 } 892 893 public int getFilterCount() { 894 return filterCount; 895 } 896 897 /** 898 * Returns the ElementGroup object containing the triple pattern. 899 * @param triple the triple patterm 900 * @return 901 */ 902 public ElementGroup getElementGroup(Triple triple){ 903 return triple2Parent.get(triple); 904 } 905 906 public static class LabeledEdge extends DefaultEdge { 907 private final Node s; 908 private final Node t; 909 private final Node edge; 910 911 public LabeledEdge(Node s, Node t, Node edge) { 912 this.s = s; 913 this.t = t; 914 this.edge = edge; 915 } 916 917 public Node getEdge() { 918 return edge; 919 } 920 921 @Override 922 public String toString() { 923 return edge.toString(); 924 } 925 } 926 927 public static void main(String[] args) throws Exception { 928 Query q = QueryFactory.create( 929 "PREFIX dbp: <http://dbpedia.org/resource/>\n" + 930 "PREFIX dbo: <http://dbpedia.org/ontology/>\n" + 931 "SELECT ?thumbnail\n" + 932 "WHERE\n" + 933 " { dbp:total dbo:thumbnail ?thumbnail }"); 934 QueryUtils queryUtils = new QueryUtils(); 935 System.out.println(queryUtils.extractOutgoingTriplePatterns(q, q.getProjectVars().get(0))); 936 System.out.println(queryUtils.extractIncomingTriplePatterns(q, q.getProjectVars().get(0))); 937 938 q = QueryFactory.create("SELECT DISTINCT ?x0\n" + 939 "WHERE\n" + 940 " { ?x0 <http://dbpedia.org/ontology/activeYearsEndYear> ?date5 ;\n" + 941 " <http://dbpedia.org/ontology/activeYearsStartYear> ?date4 ;\n" + 942 " <http://dbpedia.org/ontology/birthDate> ?date0 ;\n" + 943 " <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Austria> ;\n" + 944 " <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Austria-Hungary> ;\n" + 945 " <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Vienna> ;\n" + 946 " <http://dbpedia.org/ontology/birthYear> ?date3 ;\n" + 947 " <http://dbpedia.org/ontology/deathDate> ?date2 ;\n" + 948 " <http://dbpedia.org/ontology/deathPlace> <http://dbpedia.org/resource/Berlin> ;\n" + 949 " <http://dbpedia.org/ontology/deathPlace> <http://dbpedia.org/resource/Germany> ;\n" + 950 " <http://dbpedia.org/ontology/deathYear> ?date1 ;\n" + 951 " <http://dbpedia.org/ontology/occupation> <http://dbpedia.org/resource/Hilde_K%C3%B6rber__1> ;\n" + 952 " <http://dbpedia.org/ontology/viafId> \"32259546\" ;\n" + 953 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Person> .\n" + 954 " FILTER ( ( str(?date0) = \"1906-07-03+02:00\" ) || ( str(?date0) = \"1906-07-03\" ) )\n" + 955 " FILTER ( ( str(?date1) = \"1969+02:00\" ) || ( str(?date1) = \"1969-01-01\" ) )\n" + 956 " FILTER ( ( str(?date2) = \"1969-05-31+02:00\" ) || ( str(?date2) = \"1969-05-31\" ) )\n" + 957 " FILTER ( ( str(?date3) = \"1906+02:00\" ) || ( str(?date3) = \"1906-01-01\" ) )\n" + 958 " FILTER ( ( str(?date4) = \"1930+02:00\" ) || ( str(?date4) = \"1930-01-01\" ) )\n" + 959 " FILTER ( ( str(?date5) = \"1964+02:00\" ) || ( str(?date5) = \"1964-01-01\" ) )\n" + 960 " }"); 961 962 System.out.println(queryUtils.removeUnboundObjectVarTriples(q)); 963 964 String query = "SELECT DISTINCT ?s WHERE {" 965 + "?s a <http://dbpedia.org/ontology/BeautyQueen> ." 966 + "?s <http://dbpedia.org/ontology/birthPlace> ?o0 ." 967 + "?o0 <http://dbpedia.org/ontology/isPartOf> ?o1 ." 968 + "?o1 <http://dbpedia.org/ontology/timeZone> <http://dbpedia.org/resource/Eastern_Time_Zone> .}"; 969 970 System.out.println(QueryUtils.getSubjectObjectJoinDepth(QueryFactory.create(query), Var.alloc("s"))); 971 972 System.out.println(queryUtils.extractOutgoingTriplePatternsTrans(QueryFactory.create(query), Var.alloc("s"))); 973 } 974 975 976}