001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl; 020 021import java.io.*; 022import java.util.*; 023import java.util.function.Consumer; 024import java.util.stream.Collectors; 025import java.util.stream.Stream; 026 027import com.google.common.collect.HashMultimap; 028import com.google.common.collect.Multimap; 029import org.apache.jena.datatypes.RDFDatatype; 030import org.apache.jena.graph.Node; 031import org.apache.jena.graph.NodeFactory; 032import org.apache.jena.query.Query; 033import org.apache.jena.query.QueryFactory; 034import org.apache.jena.query.Syntax; 035import org.apache.jena.rdf.model.*; 036import org.apache.jena.reasoner.Reasoner; 037import org.apache.jena.reasoner.ReasonerRegistry; 038import org.apache.jena.shared.PrefixMapping; 039import org.apache.jena.sparql.core.Var; 040import org.apache.jena.sparql.expr.*; 041import org.apache.jena.sparql.serializer.SerializationContext; 042import org.apache.jena.sparql.syntax.ElementFilter; 043import org.apache.jena.sparql.syntax.ElementGroup; 044import org.apache.jena.sparql.util.FmtUtils; 045import org.apache.jena.sparql.util.NodeUtils; 046import org.apache.jena.vocabulary.OWL; 047import org.apache.jena.vocabulary.RDF; 048import org.apache.jena.vocabulary.RDFS; 049import org.dllearner.algorithms.qtl.datastructures.NodeInv; 050import org.dllearner.algorithms.qtl.datastructures.QueryTree; 051import org.dllearner.algorithms.qtl.datastructures.impl.GenericTree; 052import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; 053import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeSubsumptionStrategy; 054import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 055import org.dllearner.algorithms.qtl.datastructures.rendering.Edge; 056import org.dllearner.algorithms.qtl.datastructures.rendering.Vertex; 057import org.dllearner.algorithms.qtl.operations.traversal.LevelOrderTreeTraversal; 058import org.dllearner.algorithms.qtl.operations.traversal.PreOrderTreeTraversal; 059import org.dllearner.algorithms.qtl.operations.traversal.TreeTraversal; 060import org.dllearner.algorithms.qtl.util.Entailment; 061import org.dllearner.algorithms.qtl.util.VarGenerator; 062import org.dllearner.core.AbstractReasonerComponent; 063import org.dllearner.reasoning.SPARQLReasoner; 064import org.dllearner.utilities.OwlApiJenaUtils; 065import org.jgrapht.Graph; 066import org.jgrapht.graph.DefaultDirectedGraph; 067import org.jgrapht.io.*; 068import org.semanticweb.owlapi.model.*; 069import org.slf4j.Logger; 070import org.slf4j.LoggerFactory; 071import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl; 072import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 073import uk.ac.manchester.cs.owl.owlapi.OWLDataPropertyImpl; 074import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl; 075 076/** 077 * @author Lorenz Buehmann 078 * 079 */ 080public class QueryTreeUtils { 081 082 private static final VarGenerator varGen = new VarGenerator("x"); 083 private static final String TRIPLE_PATTERN_TEMPLATE = "%s %s %s ."; 084 private static final OWLDataFactory df = new OWLDataFactoryImpl(); 085 086 public static String EMPTY_QUERY_TREE_QUERY = "SELECT ?s WHERE {?s ?p ?o.}"; 087 088 private static Reasoner reasoner = ReasonerRegistry.getRDFSSimpleReasoner(); 089 090 /** 091 * Rebuilds the node IDs starting from the root node. 092 * 093 * @param tree the tree 094 */ 095 public static void rebuildNodeIDs(RDFResourceTree tree) { 096 TreeTraversal<RDFResourceTree> it = new PreOrderTreeTraversal<>(tree); 097 098 int id = 0; 099 while(it.hasNext()) { 100 it.next().setId(id++); 101 } 102 } 103 104 /** 105 * Returns the path from the given node to the root of the given tree, i.e. 106 * a list of nodes starting from the given node. 107 * @param tree the query tree 108 * @param node the node 109 */ 110 public static List<RDFResourceTree> getPathToRoot(RDFResourceTree tree, RDFResourceTree node) { 111 if(node.isRoot()) { 112 return Collections.singletonList(node); 113 } 114 List<RDFResourceTree> path = new ArrayList<>(); 115 116 // add node itself 117 path.add(node); 118 119 // add parent node 120 RDFResourceTree parent = node.getParent(); 121 path.add(parent); 122 123 // traversal up to root node 124 while(!parent.isRoot()) { 125 parent = parent.getParent(); 126 path.add(parent); 127 } 128 129 return path; 130 } 131 132 /** 133 * Print the path from the given node to the root of the given tree, i.e. 134 * a list of nodes starting from the given node. 135 * @param tree the query tree 136 * @param node the node 137 */ 138 public static String printPathToRoot(RDFResourceTree tree, RDFResourceTree node) { 139 List<RDFResourceTree> path = getPathToRoot(tree, node); 140 141 StringBuilder sb = new StringBuilder(); 142 Iterator<RDFResourceTree> iterator = path.iterator(); 143 144 RDFResourceTree child = iterator.next(); 145 sb.append(child).append("(").append(child.getID()).append(")"); 146 while (iterator.hasNext()) { 147 RDFResourceTree parent = iterator.next(); 148 sb.append(" <").append(parent.getEdgeToChild(child)).append("> "); 149 sb.append(parent).append("(").append(parent.getID()).append(")"); 150 child = parent; 151 } 152 return sb.toString(); 153 } 154 155 /** 156 * Returns the number of nodes in the given query tree, i.e. the size of 157 * the children closure. 158 * @param tree the query tree 159 * @return the number of nodes 160 */ 161 public static int getNrOfNodes(RDFResourceTree tree) { 162 return getNodes(tree).size(); 163 } 164 165 166 /** 167 * Returns the set of edges that occur in the given query tree, i.e. the 168 * closure of the edges. 169 * @param tree the query tree 170 * @return the set of edges in the query tree 171 */ 172 public static Set<Node> getEdges(RDFResourceTree tree) { 173 Set<Node> edges = new HashSet<>(); 174 175 for(Iterator<RDFResourceTree> it = new LevelOrderTreeTraversal(tree); it.hasNext();) { 176 edges.addAll(it.next().getEdges()); 177 } 178 179 return edges; 180 } 181 182 /** 183 * Returns the number of edges that occur in the given query tree, which 184 * is obviously `n-1` where n is the number of nodes. 185 * @param tree the query tree 186 * @return the number of edges in the query tree 187 */ 188 public static int getNrOfEdges(RDFResourceTree tree) { 189 return getNrOfNodes(tree) - 1; 190 } 191 192 /** 193 * Returns the complexity of the given query tree. 194 * <div> 195 * Given a query tree T = (V,E) comprising a set V of vertices or nodes 196 * together with a set E of edges or links. Moreover we have that 197 * V = U ⪠L ⪠VAR , where U denotes the nodes that are URIs, L denotes 198 * the nodes that are literals and VAR contains the nodes that are variables. 199 * We define the complexity c(T) of query tree T as follows: 200 * </div> 201 * <code>c(T) = 1 + log(|U| * α + |L| * β + |VAR| * γ) </code> 202 * <div> 203 * with <code>α, β, γ</code> being the weight of the particular node types. 204 * </div> 205 * @param tree the query tree 206 * @return the complexity value 207 */ 208 public static double getComplexity(RDFResourceTree tree) { 209 210 double varNodeWeight = 0.8; 211 double resourceNodeWeight = 1.0; 212 double literalNodeWeight = 1.0; 213 214 double complexity = 0; 215 216 List<RDFResourceTree> nodes = getNodes(tree); 217 for (RDFResourceTree node : nodes) { 218 if(node.isVarNode()) { 219 complexity += varNodeWeight; 220 } else if(node.isResourceNode()) { 221 complexity += resourceNodeWeight; 222 } else if(node.isLiteralNode()) { 223 complexity += literalNodeWeight; 224 } 225 } 226 227 return 1 + Math.log(complexity); 228 } 229 230 /** 231 * Determines if tree1 is subsumed by tree2, i.e. whether tree2 is more general than 232 * tree1. 233 * @param tree1 234 * @param tree2 235 * @return 236 */ 237 public static <N> boolean isSubsumedBy(QueryTree<N> tree1, QueryTree<N> tree2) { 238 // 1.compare the root nodes 239 // if both nodes denote the same resource or literal 240 if(tree1.isVarNode() && !tree2.isVarNode() && tree1.getUserObject().equals(tree2.getUserObject())){ 241 return true; 242 } 243 244 // if node2 is more specific than node1 245 if(tree1.isVarNode() && !tree2.isVarNode()) { 246 return false; 247 } 248 249 // 2. compare the children 250 Object edge; 251 for(QueryTree<N> child2 : tree2.getChildren()){ 252 boolean isSubsumed = false; 253 edge = tree2.getEdge(child2); 254 for(QueryTree<N> child1 : tree1.getChildren(edge)){ 255 if(child1.isSubsumedBy(child2)){ 256 isSubsumed = true; 257 break; 258 } 259 } 260 if(!isSubsumed){ 261 return false; 262 } 263 } 264 return true; 265 } 266 267 /** 268 * Returns all nodes in the given query tree. 269 * 270 * @param tree the query tree 271 * @return the nodes 272 */ 273 public static List<RDFResourceTree> getNodes(RDFResourceTree tree) { 274 List<RDFResourceTree> nodes = tree.getChildren().stream() 275 .flatMap(child -> getNodes(child).stream()) 276 .collect(Collectors.toList()); 277 nodes.add(tree); 278 279 return nodes; 280 } 281 282 /** 283 * Returns all nodes labels in the given query tree. 284 * 285 * @param tree the query tree 286 * @return the node labels 287 */ 288 public static Set<Node> getNodeLabels(RDFResourceTree tree) { 289 return getNodes(tree).stream() 290 .map(RDFResourceTree::getData) 291 .collect(Collectors.toSet()); 292 } 293 294 /** 295 * Returns all nodes in the given query tree. 296 * @param tree the query tree 297 * @return the leaf nodes 298 */ 299 public static List<RDFResourceTree> getLeafs(RDFResourceTree tree) { 300 return getNodes(tree).stream().filter(GenericTree::isLeaf).collect(Collectors.toList()); 301 } 302 303 /** 304 * Returns the depth of the query tree 305 * @param tree the query tree 306 * @return the depth 307 */ 308 public static <T, V extends GenericTree<T, V>> int getDepth(GenericTree<T, V> tree) { 309 int maxDepth = 0; 310 311 for(GenericTree<T, V> child : tree.getChildren()) { 312 int depth; 313 if(child.isLeaf()) { 314 depth = 1; 315 } else { 316 depth = 1 + getDepth(child); 317 } 318 maxDepth = Math.max(maxDepth, depth); 319 } 320 321 return maxDepth; 322 } 323 324 private static final Logger log = LoggerFactory.getLogger(QueryTreeUtils.class); 325 326 /** 327 * Determines if tree1 is subsumed by tree2, i.e. whether tree2 is more general than 328 * tree1. 329 * @param tree1 the first query tree 330 * @param tree2 the second query tree 331 * @return whether <code>tree1</code> is subsumed by <code>tree2</code> 332 */ 333 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2) { 334 log.trace("{} < {} ?",tree1, tree2); 335 // 1.compare the root nodes 336 // (T_1 != ?) and (T_2 != ?) --> T_1 = T_2 337 if(tree1.isResourceNode() && tree2.isResourceNode()) { 338 return tree1.getData().equals(tree2.getData()); 339 } else if(tree1.isLiteralNode() && tree2.isLiteralNode()) { 340 if(tree1.isLiteralValueNode()) { // T_1 is literal value v1 341 if(tree2.isLiteralValueNode()) { // T_2 is literal value v2 342 return tree1.getData().equals(tree2.getData()); // v1 = v2 ? 343 } else { // T_2 wraps literal -> check whether v1 is of same datatype as 344 RDFDatatype d1 = tree1.getDatatype(); 345 RDFDatatype d2 = tree2.getDatatype(); 346 // if there is a datatype, it must match for both trees 347 if(d1 != null) { 348 return d1.equals(d2); 349 } 350 return d2 == null; 351 } 352 } else { 353 if(tree2.isLiteralValueNode()) { 354 return false; 355 } else { 356 RDFDatatype d1 = tree1.getDatatype(); 357 RDFDatatype d2 = tree2.getDatatype(); 358 // if there is a datatype, it must match for both trees 359 if(d1 != null) { 360 return d1.equals(d2); 361 } 362 return d2 == null; 363 } 364 } 365 366 } 367 368 // TODO workaround for tuples - rething as usually blank nodes are indeed generic 369 if(tree2.getData().isBlank() && !tree2.hasChildren()) { 370 return false; 371 } 372 373 // (T_1 = ?) and (T_2 != ?) --> FALSE 374 if(tree1.isVarNode() && !tree2.isVarNode()) { 375 return false; 376 } 377 378 // 2. compare the children 379 for(Node edge2 : tree2.getEdges()){ // for each edge in T_2 380 List<RDFResourceTree> children1 = tree1.getChildren(edge2); 381 if(children1 != null) { 382 for(RDFResourceTree child2 : tree2.getChildren(edge2)) { // and each child in T_2 383 boolean isSubsumed = false; 384 for(RDFResourceTree child1 : children1){ // there has to be at least one child in T_1 that is subsumed 385 if(QueryTreeUtils.isSubsumedBy(child1, child2)){ 386 isSubsumed = true; 387 break; 388 } 389 } 390 if(!isSubsumed){ 391 return false; 392 } 393 } 394 } else { 395 return false; 396 } 397 } 398 return true; 399 } 400 401 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2, LiteralNodeSubsumptionStrategy strategy) { 402 return isSubsumedBy(tree1, tree2); 403 } 404 405 /** 406 * Determines if tree1 is subsumed by tree2, i.e. whether tree2 is more general than 407 * tree1. 408 * @param tree1 409 * @param tree2 410 * @param entailment 411 * @return 412 */ 413 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2, Entailment entailment) { 414 Resource root = ResourceFactory.createResource("http://example.org/root"); 415 416 Model m1 = toModel(tree1, root); 417 Model m2 = toModel(tree2, root); 418 419 Model m1closure = ModelFactory.createDefaultModel(); 420 m1closure.add(ModelFactory.createInfModel(reasoner, m1)); 421 422 Model m2closure = ModelFactory.createDefaultModel(); 423 m2closure.add(ModelFactory.createInfModel(reasoner, m2)); 424 425 boolean sameClosure = m1closure.isIsomorphicWith(m2closure); 426 if(sameClosure) { 427 return true; 428 } 429 430 // check if each statement of m1 is contained in m2 431 StmtIterator iterator = m2closure.listStatements(); 432 while (iterator.hasNext()) { 433 Statement st = iterator.next(); 434 if (!st.getSubject().isAnon() && !st.getObject().isAnon() 435 && !(st.getPredicate().equals(RDFS.subClassOf) && st.getSubject().equals(st.getObject())) 436 && !m1closure.contains(st)) { 437 return false; 438 } 439 } 440 return true; 441 } 442 443 /** 444 * Determines if tree1 is subsumed by tree2, i.e. whether tree2 is more general than 445 * tree1. 446 * @param tree1 447 * @param tree2 448 * @return 449 */ 450 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2, SPARQLReasoner reasoner) { 451 // 1.compare the root nodes 452 453 // (T_1 != ?) and (T_2 != ?) --> T_1 = T_2 454 if(!tree1.isVarNode() && !tree2.isVarNode()) { 455 if(tree1.isResourceNode() && tree2.isResourceNode()) { 456 457 } 458 return tree1.getData().equals(tree2.getData()); 459 } 460 461 // (T_1 = ?) and (T_2 != ?) --> FALSE 462 if(tree1.isVarNode() && !tree2.isVarNode()) { 463 return false; 464 } 465 466 // 2. compare the children 467 for(Node edge2 : tree2.getEdges()){ 468 List<RDFResourceTree> children1 = tree1.getChildren(edge2); 469 if(children1 != null) { 470 for(RDFResourceTree child2 : tree2.getChildren(edge2)) { 471 boolean isSubsumed = false; 472 473 for(RDFResourceTree child1 : children1){ 474 if(QueryTreeUtils.isSubsumedBy(child1, child2, reasoner, edge2.equals(RDF.type.asNode()))){ 475 isSubsumed = true; 476 break; 477 } 478 } 479 if(!isSubsumed){ 480 return false; 481 } 482 } 483 } else { 484 return false; 485 } 486 } 487 return true; 488 } 489 490 /** 491 * Determines if tree1 is subsumed by tree2, i.e. whether tree2 is more general than 492 * tree1. 493 * @param tree1 494 * @param tree2 495 * @return 496 */ 497 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2, AbstractReasonerComponent reasoner, boolean typeNode) { 498 // 1.compare the root nodes 499 // (T_1 != ?) and (T_2 != ?) --> T_1 = T_2 500 if(!tree1.isVarNode() && !tree2.isVarNode()) { 501 if(tree1.getData().equals(tree2.getData())) { 502 return true; 503 } else if(typeNode && tree1.isResourceNode() && tree2.isResourceNode()) { 504 return reasoner.isSuperClassOf( 505 new OWLClassImpl(IRI.create(tree2.getData().getURI())), 506 new OWLClassImpl(IRI.create(tree1.getData().getURI()))); 507 } 508 return false; 509 } 510 511 // (T_1 = ?) and (T_2 != ?) --> FALSE 512 if(tree1.isVarNode() && !tree2.isVarNode()) { 513 return false; 514 } 515 516 if(typeNode) { 517// return isSubsumedBy(tree1, tree2, Entailment.RDFS); 518 } 519 520 // 2. compare the children 521 for(Node edge2 : tree2.getEdges()){ 522 for(RDFResourceTree child2 : tree2.getChildren(edge2)) { 523 boolean isSubsumed = false; 524 List<RDFResourceTree> children = tree1.getChildren(edge2); 525 if(children != null) { 526 for(RDFResourceTree child1 : children){ 527 if(QueryTreeUtils.isSubsumedBy(child1, child2, reasoner, edge2.equals(RDF.type.asNode()))){ 528 isSubsumed = true; 529 break; 530 } 531 } 532 } 533 if(!isSubsumed){ 534 return false; 535 } 536 } 537 } 538 return true; 539 } 540 541 /** 542 * Determines if the trees are equivalent from a subsumptional point of view. 543 * @param trees 544 * @return 545 */ 546 @SafeVarargs 547 public static <N> boolean sameTrees(QueryTree<N>... trees) { 548 for(int i = 0; i < trees.length; i++) { 549 QueryTree<N> tree1 = trees[i]; 550 for(int j = i; j < trees.length; j++) { 551 QueryTree<N> tree2 = trees[j]; 552 if(!sameTrees(tree1, tree2)) { 553 return false; 554 } 555 } 556 } 557 558 return true; 559 } 560 561 /** 562 * Determines if both trees are equivalent from a subsumptional point of 563 * view. 564 * 565 * @param tree1 566 * @param tree2 567 * @return 568 */ 569 public static <N> boolean sameTrees(QueryTree<N> tree1, QueryTree<N> tree2) { 570 return isSubsumedBy(tree1, tree2) && isSubsumedBy(tree2, tree1); 571 } 572 573 public static <N> boolean sameTrees(RDFResourceTree tree1, RDFResourceTree tree2) { 574 return 575 tree1.getData().equals(tree2.getData()) && // root(t1) == root(t2) 576 tree1.getNumberOfChildren() == tree2.getNumberOfChildren() && // #children(t1) == #children(t2) 577 isSubsumedBy(tree1, tree2) && isSubsumedBy(tree2, tree1); // t1 <= t2 && t2 <= t1 578 } 579 580 public static Model toModel(RDFResourceTree tree) { 581 Model model = ModelFactory.createDefaultModel(); 582 buildModel(model, tree, model.asRDFNode(NodeFactory.createBlankNode()).asResource()); 583 return model; 584 } 585 586 public static Model toModel(RDFResourceTree tree, Resource subject) { 587 Model model = ModelFactory.createDefaultModel(); 588 buildModel(model, tree, subject); 589 return model; 590 } 591 592 private static void buildModel(Model model, RDFResourceTree tree, Resource subject) { 593 int i = 0; 594 for (Node edge : tree.getEdges()) { 595 Property p = model.getProperty(edge.getURI()); 596 for (RDFResourceTree child : tree.getChildren(edge)) { 597 RDFNode object = child.isVarNode() ? model.asRDFNode(NodeFactory.createBlankNode()) : model.asRDFNode(child.getData()); 598 model.add(subject, p, object); 599// if (child.isVarNode()) { 600 buildModel(model, child, object.asResource()); 601// } 602 } 603 } 604 } 605 606 public static OWLClassExpression toOWLClassExpression(RDFResourceTree tree) { 607 return toOWLClassExpression(tree, LiteralNodeConversionStrategy.DATATYPE); 608 } 609 610 public static OWLClassExpression toOWLClassExpression(RDFResourceTree tree, LiteralNodeConversionStrategy literalConversion) { 611 return buildOWLClassExpression(tree, literalConversion); 612 } 613 614 private static OWLClassExpression buildOWLClassExpression(RDFResourceTree tree, LiteralNodeConversionStrategy literalConversion) { 615 Set<OWLClassExpression> classExpressions = new HashSet<>(); 616 for(Node edge : tree.getEdges()) { 617 for (RDFResourceTree child : tree.getChildren(edge)) { 618 if(edge.equals(RDF.type.asNode()) || edge.equals(RDFS.subClassOf.asNode()) || edge.equals(OWL.equivalentClass.asNode())) { 619 if(child.isVarNode()) { 620 classExpressions.add(buildOWLClassExpression(child, literalConversion)); 621 } else { 622 classExpressions.add(df.getOWLClass(IRI.create(child.getData().getURI()))); 623 } 624 } else { 625 // create r some C 626 if(child.isLiteralNode()) { 627 OWLDataProperty dp = df.getOWLDataProperty(IRI.create(edge.getURI())); 628 if(!child.isLiteralValueNode()) { 629 OWLDataRange dr; 630 if(child.getDatatype() == null) { 631 dr = df.getTopDatatype(); 632 } else { 633 dr = df.getOWLDatatype(IRI.create(child.getDatatype().getURI())); 634 } 635 classExpressions.add(df.getOWLDataSomeValuesFrom(dp, dr)); 636 } else { 637 OWLLiteral value = OwlApiJenaUtils.getOWLLiteral(child.getData().getLiteral()); 638 classExpressions.add(df.getOWLDataHasValue(dp, value)); 639 } 640 641 } else { 642 OWLObjectPropertyExpression pe = df.getOWLObjectProperty(IRI.create(edge.getURI())); 643 if(edge instanceof NodeInv) { 644 pe = pe.getInverseProperty(); 645 } 646 OWLClassExpression filler = null; 647 if(child.isVarNode()) { 648 filler = buildOWLClassExpression(child, literalConversion); 649 classExpressions.add(df.getOWLObjectSomeValuesFrom( 650 pe, 651 filler)); 652 } else if (child.isResourceNode()) { 653 classExpressions.add(df.getOWLObjectHasValue( 654 pe, 655 df.getOWLNamedIndividual(IRI.create(child.getData().getURI())))); 656 } 657 } 658 } 659 } 660 } 661 classExpressions.remove(df.getOWLThing()); 662 if(classExpressions.isEmpty()) { 663 return df.getOWLThing(); 664 } else if(classExpressions.size() == 1){ 665 return classExpressions.iterator().next(); 666 } else { 667 return df.getOWLObjectIntersectionOf(classExpressions); 668 } 669 } 670 671 /** 672 * Returns a SPARQL query representing the query tree. Note, for empty trees 673 * it just returns 674 * <p><code>SELECT ?s WHERE {?s ?p ?o.}</code></p> 675 * @param tree 676 * @return 677 */ 678 public static Query toSPARQLQuery(RDFResourceTree tree) { 679 return QueryFactory.create(toSPARQLQueryString(tree)); 680 } 681 682 public static String toSPARQLQueryString(RDFResourceTree tree, List<Node> nodes2Select, String baseIRI, PrefixMapping pm) { 683 return toSPARQLQueryString(tree, baseIRI, pm, LiteralNodeConversionStrategy.DATATYPE, nodes2Select); 684 } 685 686 public static String toSPARQLQueryString(RDFResourceTree tree) { 687 return toSPARQLQueryString(tree, PrefixMapping.Standard); 688 } 689 690 public static String toSPARQLQueryString(RDFResourceTree tree, PrefixMapping pm) { 691 return toSPARQLQueryString(tree, null, pm, LiteralNodeConversionStrategy.DATATYPE, Collections.emptyList()); 692 } 693 694 public static String toSPARQLQueryString(RDFResourceTree tree, String baseIRI, PrefixMapping pm) { 695 return toSPARQLQueryString(tree, baseIRI, pm, LiteralNodeConversionStrategy.DATATYPE, Collections.emptyList()); 696 } 697 698 public static String toSPARQLQueryString(RDFResourceTree tree, String baseIRI, PrefixMapping pm, 699 LiteralNodeConversionStrategy literalConversion, List<Node> nodes2Select) { 700 if(!tree.hasChildren()){ 701 return EMPTY_QUERY_TREE_QUERY; 702 } 703 704 varGen.reset(); 705 706 SerializationContext context = new SerializationContext(pm); 707 context.setBaseIRI(baseIRI); 708 709 StringBuilder sb = new StringBuilder(); 710 711 // Add BASE declaration 712 if (baseIRI != null) { 713 sb.append("BASE "); 714 sb.append(FmtUtils.stringForURI(baseIRI, null, null)); 715 sb.append('\n'); 716 } 717 718 // Then pre-pend prefixes 719 for (String prefix : pm.getNsPrefixMap().keySet()) { 720 sb.append("PREFIX "); 721 sb.append(prefix); 722 sb.append(": "); 723 sb.append(FmtUtils.stringForURI(pm.getNsPrefixURI(prefix), null, null)); 724 sb.append('\n'); 725 } 726 727 List<ExprNode> filters = new ArrayList<>(); 728 729 // target var 730 String targetVar = "?s"; 731 732 // header 733 if(!nodes2Select.isEmpty()) { 734 sb.append(String.format("SELECT %s %s WHERE {%n", targetVar, nodes2Select.stream().map(node -> "?var" + nodes2Select.indexOf(node)).collect(Collectors.joining(" ")))); 735 } else { 736 sb.append(String.format("SELECT DISTINCT %s WHERE {\n", targetVar)); 737 } 738 739 // triple patterns 740 buildSPARQLQueryString(tree, targetVar, sb, filters, context, nodes2Select); 741 742 sb.append("}"); 743 744 Query query = QueryFactory.create(sb.toString(), Syntax.syntaxSPARQL_11); 745 746 // filters 747 if(!filters.isEmpty()) { 748 Iterator<ExprNode> it = filters.iterator(); 749 ExprNode filter = it.next(); 750 while(it.hasNext()) { 751 filter = new E_LogicalAnd(filter, it.next()); 752 } 753 ((ElementGroup)query.getQueryPattern()).addElementFilter(new ElementFilter(filter)); 754 } 755 756 query.setPrefixMapping(pm); 757 758 return query.toString(); 759 } 760 761 private static int buildGraph(Integer parentId, Graph<Vertex, Edge> graph, RDFResourceTree tree, SerializationContext context){ 762 Vertex parent = new Vertex(parentId, FmtUtils.stringForNode(tree.getData(), context)); 763 graph.addVertex(parent); 764 765 int childId = parentId; 766 767 for (Node edgeNode : tree.getEdges()) { 768 String edgeLabel = FmtUtils.stringForNode(edgeNode, context); 769 for (RDFResourceTree child : tree.getChildren(edgeNode)) { 770 childId++; 771 String childLabel = FmtUtils.stringForNode(child.getData(), context); 772 773 Vertex childVertex = new Vertex(childId, childLabel); 774 graph.addVertex(childVertex); 775 776 Edge edge = new Edge(Long.parseLong(parentId + "0" + childId), edgeLabel); 777 graph.addEdge(parent, childVertex, edge); 778 779 childId = buildGraph(childId, graph, child, context); 780 } 781 } 782 783 return childId; 784 } 785 786 /** 787 * Convert the query tree to a directed labelled graph. 788 * @param tree the query tree 789 * @param baseIRI the base IRI used for rendering of the nodes 790 * @param pm the prefixes used for rendering of the nodes 791 * @return the directed labelled graph 792 */ 793 public static Graph<Vertex, Edge> toGraph(RDFResourceTree tree, String baseIRI, PrefixMapping pm) { 794 SerializationContext context = new SerializationContext(pm); 795 context.setBaseIRI(baseIRI); 796 797 final Graph<Vertex, Edge> graph = new DefaultDirectedGraph<>(Edge.class); 798 799// TreeTraversal it = new PreOrderTreeTraversal(tree); 800// while(it.hasNext()) { 801// RDFResourceTree node = it.next(); 802// node.get 803// } 804 805 buildGraph(0, graph, tree, context); 806 807 return graph; 808 } 809 810 /** 811 * Export the query tree as GraphML file. 812 * 813 * @param tree the query tree 814 * @param baseIRI (optional) base IRI 815 * @param pm (optional) prefix mapping 816 * @param outputFile the output file 817 */ 818 public static void asGraph(RDFResourceTree tree, String baseIRI, PrefixMapping pm, File outputFile) { 819 Objects.requireNonNull(tree); 820 Objects.requireNonNull(outputFile); 821 822 SerializationContext context = new SerializationContext(pm); 823 context.setBaseIRI(baseIRI); 824 825 final Graph<Vertex, Edge> graph = new DefaultDirectedGraph<>(Edge.class); 826 buildGraph(0, graph, tree, context); 827 828 ComponentNameProvider<Vertex> vertexIDProvider = vertex -> String.valueOf(vertex.getId()); 829 ComponentNameProvider<Vertex> vertexNameProvider = Vertex::getLabel; 830 831 ComponentNameProvider<Edge> edgeIDProvider = edge -> String.valueOf(edge.getId()); 832 ComponentNameProvider<Edge> edgeLabelProvider = Edge::getLabel; 833 834 GraphMLExporter<Vertex, Edge> exporter = new GraphMLExporter<>( 835 vertexIDProvider, vertexNameProvider, 836 edgeIDProvider, edgeLabelProvider); 837 838 Map<String, Attribute> rootNodeAttributes = new HashMap<>(); 839 rootNodeAttributes.put("rootNode", new DefaultAttribute<>(true, AttributeType.BOOLEAN)); 840 841 ComponentAttributeProvider<Vertex> vertexAttributeProvider = vertex -> { 842 if(vertex.getId() == tree.getID()) { 843 return rootNodeAttributes; 844 } 845 return null; 846 }; 847 exporter.setVertexAttributeProvider(vertexAttributeProvider); 848 849 exporter.registerAttribute("rootNode", GraphMLExporter.AttributeCategory.NODE, AttributeType.BOOLEAN, "false"); 850 851 try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"))) { 852 exporter.exportGraph(graph, writer); 853 } catch (IOException | ExportException e) { 854 log.error("failed to write graph to file " + outputFile, e); 855 } 856 } 857 858 private static void buildSPARQLQueryString(RDFResourceTree tree, 859 String subjectStr, StringBuilder sb, Collection<ExprNode> filters, 860 SerializationContext context, List<Node> nodes2Select) { 861 if (!tree.isLeaf()) { 862 // process rdf:type edges first 863 List<Node> edges = new ArrayList<>(tree.getEdges()); 864 edges.sort((e1, e2) -> { 865 if(e1.matches(RDF.type.asNode())){ 866 return -2; 867 } else if(e2.matches(RDF.type.asNode())){ 868 return 2; 869 } else { 870 return NodeUtils.compareRDFTerms(e1, e2); 871 }}); 872 873 for (Node edge : edges) { 874 // process predicate 875 String predicateStr = FmtUtils.stringForNode(edge, context); 876 877 // process children 878 // the concrete values first 879 List<RDFResourceTree> children = tree.getChildren(edge); 880 if(!(edge instanceof NodeInv)) { 881 List<Node> concreteChildren = 882 children.stream() 883 .filter(t -> t.isLiteralValueNode() || t.isResourceNode()) 884 .filter(t -> !t.hasAnchor()) 885 .map(GenericTree::getData) 886 .filter(node -> !(node.isVariable() || node.isBlank())) 887 .collect(Collectors.toList()); 888 if(!concreteChildren.isEmpty()) { 889 String objStr = concreteChildren.stream() 890 .map(node -> FmtUtils.stringForNode(node, context)) 891 .collect(Collectors.joining(",")); 892 String tpStr = (edge instanceof NodeInv) 893 ? String.format(TRIPLE_PATTERN_TEMPLATE, objStr, predicateStr, subjectStr) 894 : String.format(TRIPLE_PATTERN_TEMPLATE, subjectStr, predicateStr, objStr); 895 896 sb.append(tpStr).append("\n"); 897 } 898 children.stream() 899 .filter(t -> t.hasAnchor() && (t.isLiteralValueNode() || t.isResourceNode())) 900 .forEach(child -> { 901 Var obj = Var.alloc("var" + nodes2Select.indexOf(child.getAnchorVar())); 902 String objStr = FmtUtils.stringForNode(obj, context); 903 String tpStr = (edge instanceof NodeInv) 904 ? String.format(TRIPLE_PATTERN_TEMPLATE, objStr, predicateStr, subjectStr) 905 : String.format(TRIPLE_PATTERN_TEMPLATE, subjectStr, predicateStr, objStr); 906 907 sb.append(tpStr).append("\n"); 908 filters.add(new E_Equals(new ExprVar(obj), NodeValue.makeNode(child.getData()))); 909 }); 910 } 911 Stream<RDFResourceTree> childStream = children.stream(); 912 if(!(edge instanceof NodeInv)) { 913 childStream = childStream 914 .filter(t -> !(t.isLiteralValueNode() || t.isResourceNode())); 915// .filter(child -> (child.getData().isVariable() || child.getData().isBlank())); 916 } 917 // the var nodes next 918 childStream 919 .forEach(child -> { 920 // pre-process object 921 Node object = child.getData(); 922 if(child.hasAnchor()) { 923 object = Var.alloc("var" + nodes2Select.indexOf(child.getAnchorVar())); 924 } else if(nodes2Select.contains(object)) { 925 object = Var.alloc("var" + nodes2Select.indexOf(object)); 926 } else if(child.isVarNode()) { 927 // set a fresh var in the SPARQL query 928 object = varGen.newVar(); 929 } else if(child.isLiteralNode() && !child.isLiteralValueNode()) { 930 // set a fresh var in the SPARQL query 931 object = varGen.newVar(); 932 933 // literal node describing a set of literals is rendered depending on the conversion strategy 934 if(child.getDatatype() != null) { 935 ExprNode filter = new E_Equals( 936 new E_Datatype(new ExprVar(object)), 937 NodeValue.makeNode(NodeFactory.createURI(child.getDatatype().getURI()))); 938 // filters.add(filter); 939 } 940 941 } 942 943 // process object 944 String objectStr = FmtUtils.stringForNode(object, context); 945 946 // append triple pattern 947 String tp = (edge instanceof NodeInv) 948 ? String.format(TRIPLE_PATTERN_TEMPLATE, objectStr, predicateStr, subjectStr) 949 : String.format(TRIPLE_PATTERN_TEMPLATE, subjectStr, predicateStr, objectStr); 950 sb.append(tp).append("\n"); 951 952 /* 953 * only if child is var node recursively process children if 954 * exist because for URIs it doesn't make sense to add the 955 * triple pattern and for literals there can't exist a child 956 * in the tree 957 */ 958 if (child.isVarNode() || child.getData().isBlank()) { 959 buildSPARQLQueryString(child, objectStr, sb, filters, context, nodes2Select); 960 } 961 }); 962 } 963 } 964 } 965 966 public static RDFResourceTree materializePropertyDomains(RDFResourceTree tree, AbstractReasonerComponent reasoner) { 967 RDFResourceTree newTree = new RDFResourceTree(tree.getData()); 968 969 Consumer<OWLClass> addTypeChild = (cls) -> newTree.addChild(new RDFResourceTree(OwlApiJenaUtils.asNode(cls)), RDF.type.asNode()); 970 971 tree.getEdges().forEach(edge -> { 972 List<RDFResourceTree> children = tree.getChildren(edge); 973 974 // add existing children 975 children.forEach(child -> { 976 RDFResourceTree newChild = materializePropertyDomains(child, reasoner); 977 newTree.addChild(newChild, edge); 978 }); 979 980 // add the rdf:type statements for the property domain(s) 981 OWLClassExpression dom = reasoner.getDomain(OwlApiJenaUtils.asOWLEntity(edge, EntityType.OBJECT_PROPERTY)); 982 if(!dom.isAnonymous() && !dom.isOWLThing()) { 983 addTypeChild.accept(dom.asOWLClass()); 984 } else { 985 if(dom.getClassExpressionType() == ClassExpressionType.OBJECT_INTERSECTION_OF) { 986 dom.getNestedClassExpressions().stream() 987 .filter(ce -> !ce.isAnonymous()) 988 .map(OWLClassExpression::asOWLClass) 989 .forEach(addTypeChild); 990 } 991 } 992 }); 993 994 return newTree; 995 } 996 997 /** 998 * Adds all rdf:type statements to each node based on the domain and range of the edges as well as the subClassOf 999 * relations between all existing types. 1000 * 1001 * @param tree the query tree 1002 * @param reasoner the reasoner 1003 * @return a new rdf:type materialized tree 1004 */ 1005 public static RDFResourceTree materializeTypes(RDFResourceTree tree, AbstractReasonerComponent reasoner) { 1006 RDFResourceTree newTree = new RDFResourceTree(tree.getData()); 1007 1008 Consumer<OWLClass> addTypeChild = (cls) -> newTree.addChild(new RDFResourceTree(OwlApiJenaUtils.asNode(cls)), RDF.type.asNode()); 1009 1010 Set<OWLClassExpression> types = new HashSet<>(); 1011 1012 // process the outgoing non-rdf:type edges 1013 tree.getEdges().stream().filter(edge -> !edge.equals(RDF.type.asNode())).forEach(edge -> { 1014 List<RDFResourceTree> children = tree.getChildren(edge); 1015 1016 // add existing children 1017 children.forEach(child -> { 1018 RDFResourceTree newChild = materializeTypes(child, reasoner); 1019 newTree.addChild(newChild, edge); 1020 }); 1021 1022 // collect rdf:type information, based on the edge 1023 // a) normal edge: the rdfs:domain information if exist 1024 // b) inverse edge: the rdfs:range information if exist 1025 if(edge instanceof NodeInv) { 1026 types.add(reasoner.getRange(OwlApiJenaUtils.asOWLEntity(edge, EntityType.OBJECT_PROPERTY))); 1027 } else { 1028 types.add(reasoner.getDomain(OwlApiJenaUtils.asOWLEntity(edge, EntityType.OBJECT_PROPERTY))); 1029 } 1030 }); 1031 1032 // collect rdf:type information, based on the incoming edge(s) 1033 // a) normal edge: the rdfs:range information if exist 1034 // b) inverse edge: the rdfs:domain information if exist 1035 if(!tree.isRoot()) { 1036 Node inEdge = tree.getEdgeToParent(); 1037 if(inEdge instanceof NodeInv) { 1038 types.add(reasoner.getDomain(OwlApiJenaUtils.asOWLEntity(inEdge, EntityType.OBJECT_PROPERTY))); 1039 } else { 1040 types.add(reasoner.getRange(OwlApiJenaUtils.asOWLEntity(inEdge, EntityType.OBJECT_PROPERTY))); 1041 } 1042 } 1043 1044 // collect the existing rdf:type nodes 1045 List<RDFResourceTree> children = tree.getChildren(RDF.type.asNode()); 1046 if(children != null) { 1047 children.forEach(child -> types.add(OwlApiJenaUtils.asOWLEntity(child.getData(), EntityType.CLASS))); 1048 } 1049 1050 // we don't keep owl:Thing todo make this configurable 1051 types.remove(df.getOWLThing()); 1052 1053 // process the collected (complex) types, i.e. add an rdf:type edge for each named class 1054 types.forEach(type -> { 1055 if(!type.isAnonymous()) { 1056 addTypeChild.accept(type.asOWLClass()); 1057 } else { 1058 if(type.getClassExpressionType() == ClassExpressionType.OBJECT_INTERSECTION_OF) { 1059 type.getNestedClassExpressions().stream() 1060 .filter(ce -> !ce.isAnonymous()) 1061 .map(OWLClassExpression::asOWLClass) 1062 .forEach(addTypeChild); 1063 } 1064 } 1065 }); 1066 1067 return newTree; 1068 } 1069 1070 /** 1071 * Remove trivial statements according to the given entailment semantics: 1072 * <h3>RDFS</h3> 1073 * <ul> 1074 * <li>remove trivial statements like <code>?s a ?x</code> 1075 * <li>remove type statements if this is given by domain and range 1076 * of used statements.</li> 1077 * 1078 * </ul> 1079 * @param tree the tree 1080 * @param entailment the entailment regime 1081 */ 1082 public static void prune(RDFResourceTree tree, AbstractReasonerComponent reasoner, Entailment entailment) { 1083 1084 // remove trivial statements 1085 for(Node edge : new TreeSet<>(tree.getEdges())) { 1086 if(edge.equals(RDF.type.asNode())) { // check outgoing rdf:type edges 1087 List<RDFResourceTree> children = new ArrayList<>(tree.getChildren(edge)); 1088 children.stream().filter(child -> !isNonTrivial(child, entailment)).forEach(child -> tree.removeChild(child, edge)); 1089 } else {// recursively apply pruning on all subtrees 1090 List<RDFResourceTree> children = tree.getChildren(edge); 1091 1092 for (RDFResourceTree child : children) { 1093 prune(child, reasoner, entailment); 1094 } 1095 } 1096 } 1097 1098 if(entailment == Entailment.RDFS) { 1099// // 1. rdfs:domain: 1100// // remove rdf:type edges if this is implicitly given by the other outgoing edges 1101// // 2. rdfs:range: 1102// // remove rdf:type edges if this is implicitly given by the incoming edge 1103// if (!tree.isLeaf()) { 1104// SortedSet<Node> edges = tree.getEdges(NodeType.RESOURCE); 1105// 1106// List<RDFResourceTree> typeChildren = tree.getChildren(RDF.type.asNode()); 1107// 1108// if(typeChildren != null && !typeChildren.isEmpty()) { 1109// // get domains 1110// Set<Node> domains = new HashSet<Node>(); 1111// for (Node edge : edges) { 1112// OWLClassExpression domain = reasoner.getDomain(new OWLObjectPropertyImpl(IRI.create(edge.getURI()))); 1113// if(!domain.isAnonymous()) { 1114// domains.add(NodeFactory.createURI(domain.asOWLClass().toStringID())); 1115// } 1116// } 1117// 1118// // get range of incoming edge 1119// Set<Node> ranges = new HashSet<Node>(); 1120// 1121// if(!tree.isRoot()) { 1122// // get the incoming edge from parent node 1123// Node incomingEdge = tree.getParent().getEdgeToChild(tree); 1124// 1125// OWLClassExpression rangeExpression = reasoner.getRange(new OWLObjectPropertyImpl(IRI.create(incomingEdge.getURI()))); 1126// if(rangeExpression.isAnonymous()) { 1127// // TODO we have to handle complex class expressions, e.g. split intersections 1128// } else { 1129// ranges.add(NodeFactory.createURI(rangeExpression.asOWLClass().toStringID())); 1130// } 1131// } 1132// 1133// // remove rdf:type children if implicitly given by domain or range 1134// for (RDFResourceTree child : new ArrayList<>(typeChildren)) { 1135// if(domains.contains(child.getData()) || ranges.contains(child.getData())) { 1136// tree.removeChild(child, RDF.type.asNode()); 1137// } 1138// } 1139// } 1140// } 1141// 1142// // apply recursively on children 1143// SortedSet<Node> edges = tree.getEdges(); 1144// for (Node edge : edges) { 1145// if(!edge.equals(RDF.type.asNode())) { 1146// for (RDFResourceTree child : tree.getChildren(edge)) { 1147// prune(child, reasoner, entailment); 1148// } 1149// } 1150// } 1151 } 1152 1153 // we have to run the subsumption check one more time to prune the tree 1154 for (Node edge : tree.getEdges()) { 1155 Set<RDFResourceTree> children2Remove = new HashSet<>(); 1156 List<RDFResourceTree> children = tree.getChildren(edge); 1157 for(int i = 0; i < children.size(); i++) { 1158 RDFResourceTree child1 = children.get(i); 1159 if(!children2Remove.contains(child1)) { 1160 for(int j = i + 1; j < children.size(); j++) { 1161 RDFResourceTree child2 = children.get(j); 1162// System.out.println(QueryTreeUtils.getPathToRoot(tree, child1)); 1163// System.out.println(QueryTreeUtils.getPathToRoot(tree, child2)); 1164 if(!children2Remove.contains(child2)) { 1165 if (QueryTreeUtils.isSubsumedBy(child1, child2)) { 1166 children2Remove.add(child2); 1167 } else if (QueryTreeUtils.isSubsumedBy(child2, child1)) { 1168 children2Remove.add(child1); 1169 } 1170 } 1171 } 1172 } 1173 1174 } 1175 1176 for (RDFResourceTree child : children2Remove) { 1177 tree.removeChild(child, edge); 1178 } 1179 } 1180 1181// if(entailment == Entailment.RDFS) { 1182// if(reasoner != null) { 1183// List<RDFResourceTree> typeChildren = tree.getChildren(RDF.type.asNode()); 1184// 1185// // compute implicit types 1186// Set<OWLClassExpression> implicitTypes = new HashSet<OWLClassExpression>(); 1187// for(Node edge : tree.getEdges()) { 1188// if(!edge.equals(RDF.type.asNode())) { 1189// // get domain for property 1190// implicitTypes.add(reasoner.getDomain(new OWLObjectPropertyImpl(IRI.create(edge.getURI())))); 1191// } 1192// } 1193// if(typeChildren != null) { 1194// // remove type children which are already covered implicitly 1195// for (RDFResourceTree child : new ArrayList<RDFResourceTree>(tree.getChildren(RDF.type.asNode()))) { 1196// if(child.isResourceNode() && implicitTypes.contains(new OWLClassImpl(IRI.create(child.getData().getURI())))) { 1197// tree.removeChild(child, RDF.type.asNode()); 1198// System.out.println("removing " + child.getData().getURI()); 1199// } 1200// } 1201// } 1202// 1203// } 1204// } 1205 } 1206 1207 /** 1208 * Recursively removes edges that lead to a leaf node which is a variable. 1209 * @param tree the tree 1210 */ 1211 public static boolean removeVarLeafs(RDFResourceTree tree) { 1212 SortedSet<Node> edges = new TreeSet<>(tree.getEdges()); 1213 1214 boolean modified = false; 1215 for (Node edge : edges) { 1216 List<RDFResourceTree> children = new ArrayList<>(tree.getChildren(edge)); 1217// 1218 for (RDFResourceTree child : children) { 1219 if(child.isLeaf() && child.isVarNode()) { 1220 tree.removeChild(child, edge); 1221 modified = true; 1222 } else { 1223 modified = removeVarLeafs(child); 1224 if(modified && child.isLeaf() && child.isVarNode()) { 1225 tree.removeChild(child, edge); 1226 modified = true; 1227 } 1228 } 1229 } 1230 1231 1232 } 1233 return modified; 1234 } 1235 1236 /** 1237 * Prune the rdf:type nodes such that only the most specific types remain w.r.t. the given reasoner. 1238 * 1239 * @param tree the tree 1240 */ 1241 public static boolean keepMostSpecificTypes(RDFResourceTree tree, AbstractReasonerComponent reasoner) { 1242 boolean modified = false; 1243 1244 // process child nodes first 1245 for (Node edge : tree.getEdges()) { 1246 for (RDFResourceTree child : tree.getChildren(edge)) { 1247 modified |= keepMostSpecificTypes(child, reasoner); 1248 } 1249 } 1250 1251 // prune the rdf:type nodes 1252 List<RDFResourceTree> typeChildren = tree.getChildren(RDF.type.asNode()); 1253 if (typeChildren != null) { 1254 // collapse rdfs:subClassOf paths 1255 new ArrayList<>(typeChildren).stream().filter(RDFResourceTree::isVarNode).forEach(child -> { 1256 // check if there are rdfs:subClassOf edges TODO we need a complete "collapse" method here 1257 List<RDFResourceTree> subClassOfChildren = child.getChildren(RDFS.subClassOf.asNode()); 1258 if(subClassOfChildren != null) { 1259 new ArrayList<>(subClassOfChildren).forEach(childChild -> { 1260 if(childChild.isResourceNode()) { 1261 tree.addChild(childChild, RDF.type.asNode()); 1262 child.removeChild(childChild, RDFS.subClassOf.asNode()); 1263 } 1264 }); 1265 tree.removeChild(child, RDF.type.asNode()); 1266 } else { 1267 if(!child.hasChildren()) { 1268 tree.removeChild(child, RDF.type.asNode()); 1269 } 1270 } 1271 }); 1272 // refresh the rdf:type children after subClassOf collapsing 1273 typeChildren = tree.getChildren(RDF.type.asNode()); 1274 1275 if(typeChildren != null) { 1276 List<RDFResourceTree> children2Remove = new ArrayList<>(); 1277 1278 for (int i = 0; i < typeChildren.size(); i++) { 1279 RDFResourceTree child1 = typeChildren.get(i); 1280 OWLClass cls1 = df.getOWLClass(IRI.create(child1.getData().getURI())); 1281 1282 for (int j = i+1; j < typeChildren.size(); j++) { 1283 RDFResourceTree child2 = typeChildren.get(j); 1284 OWLClass cls2 = df.getOWLClass(IRI.create(child2.getData().getURI())); 1285 1286 if(reasoner.isSuperClassOf(cls1, cls2)) { // T2 subClassOf T1 -> remove T1 1287 children2Remove.add(child1); 1288 } else if(reasoner.isSuperClassOf(cls2, cls1)) { // T1 subClassOf T2 -> remove T2 1289 children2Remove.add(child2); 1290 } 1291 } 1292 } 1293 children2Remove.forEach(c -> tree.removeChild(c, RDF.type.asNode())); 1294 } 1295 } 1296 1297 return modified; 1298 } 1299 1300 public static boolean isNonTrivial(RDFResourceTree tree, Entailment entailment) { 1301 if(tree.isResourceNode() || tree.isLiteralNode()){ 1302 return true; 1303 } else { 1304 for (Node edge : tree.getEdges()) { 1305 for (RDFResourceTree child : tree.getChildren(edge)) { 1306 if(!edge.equals(RDFS.subClassOf.asNode())){ 1307 return true; 1308 } else if(child.isResourceNode()){ 1309 return true; 1310 } else if(isNonTrivial(child, entailment)){ 1311 return true; 1312 } 1313 } 1314 } 1315 1316 } 1317 return false; 1318 } 1319 1320 /** 1321 * @param tree1 1322 * @param tree2 1323 * @param entailment 1324 * @param reasoner 1325 * @return 1326 */ 1327 public static boolean isSubsumedBy(RDFResourceTree tree1, RDFResourceTree tree2, Entailment entailment, 1328 AbstractReasonerComponent reasoner) { 1329 1330 if(entailment == Entailment.SIMPLE) { 1331 return isSubsumedBy(tree1, tree2); 1332 } 1333 1334 // 1.compare the root nodes 1335 1336 // (T_1 != ?) and (T_2 != ?) --> T_1 = T_2 1337 if (!tree1.isVarNode() && !tree2.isVarNode()) { 1338 if (tree1.isResourceNode() && tree2.isResourceNode()) { 1339 return tree1.getData().equals(tree2.getData()); 1340 } else if(tree1.isLiteralNode() && tree2.isLiteralNode()) { 1341 if(tree1.isLiteralValueNode()) { 1342 if(tree2.isLiteralValueNode()) { 1343 return tree1.getData().equals(tree2.getData()); 1344 } else { 1345 RDFDatatype d1 = tree1.getData().getLiteralDatatype(); 1346 return tree2.getDatatype().equals(d1); 1347 } 1348 } else { 1349 if(tree2.isLiteralValueNode()) { 1350 return false; 1351 } else { 1352 RDFDatatype d1 = tree1.getDatatype(); 1353 return tree2.getDatatype().equals(d1); 1354 } 1355 } 1356 1357 } 1358 } 1359 1360 // (T_1 = ?) and (T_2 != ?) --> FALSE 1361 if (tree1.isVarNode() && !tree2.isVarNode()) { 1362 return false; 1363 } 1364 1365 // 2. compare the children 1366 for (Node edge2 : tree2.getEdges()) { 1367 1368 // get sub properties 1369 OWLObjectProperty prop2 = OwlApiJenaUtils.asOWLEntity(edge2, EntityType.OBJECT_PROPERTY); 1370 SortedSet<OWLObjectProperty> subProperties = reasoner.getSubProperties(prop2); 1371 subProperties.add(prop2); 1372 1373 // for each subtree T2_sub in T2 1374 for (RDFResourceTree child2 : tree2.getChildren(edge2)) { 1375 boolean childSubsumed = false; 1376 1377 // for each sub edge 1378 for(OWLObjectProperty subProp : subProperties) { 1379 // check if there is a child in T_1 that is subsumed by 1380 Node edge1 = OwlApiJenaUtils.asNode(subProp); 1381 List<RDFResourceTree> children1 = tree1.getChildren(edge1); 1382 1383 if(children1 != null) { 1384 for (RDFResourceTree child1 : children1) { 1385 if (QueryTreeUtils.isSubsumedBy(child1, child2, entailment, reasoner)) { 1386 childSubsumed = true; 1387 break; 1388 } 1389 } 1390 } 1391 if(childSubsumed) { 1392 break; 1393 } 1394 } 1395 1396 // we found no subtree in T1 that is subsumed by t2_sub 1397 if(!childSubsumed) { 1398 return false; 1399 } 1400 } 1401 } 1402 1403 // 2. compare the children 1404// for (Node edge2 : tree2.getEdges()) { 1405// 1406// // get sub properties 1407// OWLObjectProperty prop2 = OwlApiJenaUtils.asOWLEntity(edge2, EntityType.OBJECT_PROPERTY); 1408// SortedSet<OWLObjectProperty> subProperties = reasoner.getSubProperties(prop2); 1409// subProperties.add(prop2); 1410// 1411// boolean edgeSubsumed = false; 1412// 1413// Iterator<OWLObjectProperty> iterator = subProperties.iterator(); 1414// while (!edgeSubsumed && iterator.hasNext()) { 1415// OWLObjectProperty subProp = iterator.next(); 1416// 1417// Node edge1 = OwlApiJenaUtils.asNode(subProp); 1418// 1419// List<RDFResourceTree> children1 = tree1.getChildren(edge1); 1420// 1421// if (children1 != null) { 1422// 1423// boolean childrenSubsumed = true; 1424// for (RDFResourceTree child2 : tree2.getChildren(edge2)) { 1425// boolean childSubsumed = false; 1426// 1427// for (RDFResourceTree child1 : children1) { 1428// if (QueryTreeUtils.isSubsumedBy(child1, child2, entailment, reasoner)) { 1429// childSubsumed = true; 1430// break; 1431// } 1432// } 1433// if(!childSubsumed) { 1434// childrenSubsumed = false; 1435// } 1436// } 1437// 1438// if(childrenSubsumed) { 1439// edgeSubsumed = true; 1440// } 1441// } 1442// } 1443// 1444// if(!edgeSubsumed) { 1445// System.err.println("edge not subsumed"); 1446// return false; 1447// } 1448// } 1449 return true; 1450 } 1451 1452 /* 1453 * For each edge in tree 1 we compute the related edges in tree 2. 1454 */ 1455 private static Multimap<Node, Node> getRelatedEdges(RDFResourceTree tree1, RDFResourceTree tree2, AbstractReasonerComponent reasoner) { 1456 Multimap<Node, Node> relatedEdges = HashMultimap.create(); 1457 1458 for(Node edge1 : tree1.getEdges()) { 1459 // trivial 1460 if(tree2.getEdges().contains(edge1)) { 1461 relatedEdges.put(edge1, edge1); 1462 } 1463 // check if it's not a built-in properties 1464 if (!edge1.getNameSpace().equals(RDF.getURI()) 1465 && !edge1.getNameSpace().equals(RDFS.getURI()) 1466 && !edge1.getNameSpace().equals(OWL.getURI())) { 1467 1468 // get related edges by subsumption 1469 OWLProperty prop; 1470 if(tree1.isObjectPropertyEdge(edge1)) { 1471 prop = new OWLObjectPropertyImpl(IRI.create(edge1.getURI())); 1472 } else { 1473 prop = new OWLDataPropertyImpl(IRI.create(edge1.getURI())); 1474 } 1475 1476 for (OWLProperty p : reasoner.getSuperProperties(prop)) { 1477 Node edge = NodeFactory.createURI(p.toStringID()); 1478 if(tree2.getEdges().contains(edge)) { 1479 relatedEdges.put(edge1, edge); 1480 } 1481 } 1482 for (OWLProperty p : reasoner.getSubProperties(prop)) { 1483 Node edge = NodeFactory.createURI(p.toStringID()); 1484 if(tree2.getEdges().contains(edge)) { 1485 relatedEdges.put(edge1, edge); 1486 } 1487 } 1488 } 1489 } 1490 return relatedEdges; 1491 } 1492 1493 /** 1494 * Returns all paths to leaf nodes. 1495 * 1496 * @param tree the tree 1497 * @param <T> 1498 * @param <V> 1499 * @return all paths to leaf nodes 1500 */ 1501 public static <T, V extends GenericTree<T, V>> List<List<V>> getPathsToLeafs(GenericTree<T, V> tree) { 1502 List<List<V>> paths = new ArrayList<>(); 1503 getPathsToLeafs(paths, new ArrayList<>(), tree); 1504 return paths; 1505 } 1506 1507 private static <T, V extends GenericTree<T, V>> void getPathsToLeafs(List<List<V>> paths, List<V> path, GenericTree<T, V> tree) { 1508 List<V> children = tree.getChildren(); 1509 for (V child : children) { 1510 List<V> newPath = new ArrayList<>(path); 1511 newPath.add(child); 1512 if(child.isLeaf()) { 1513 paths.add(newPath); 1514 } else { 1515 getPathsToLeafs(paths, newPath, child); 1516 } 1517 } 1518 } 1519}