001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms; 020 021import org.apache.jena.ontology.OntClass; 022import org.apache.jena.ontology.OntModel; 023import org.apache.jena.query.*; 024import org.dllearner.core.*; 025import org.dllearner.core.annotations.Unused; 026import org.dllearner.core.config.ConfigOption; 027import org.dllearner.core.owl.ClassHierarchy; 028import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; 029import org.dllearner.kb.SparqlEndpointKS; 030import org.dllearner.kb.sparql.SparqlEndpoint; 031import org.dllearner.learningproblems.AxiomScore; 032import org.dllearner.learningproblems.Heuristics; 033import org.semanticweb.owlapi.model.*; 034import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl; 035 036import java.util.*; 037import java.util.Map.Entry; 038import java.util.stream.Collectors; 039 040/** 041 * Learns disjoint classes using SPARQL queries. 042 * 043 * @author Lorenz Bühmann 044 * @author Jens Lehmann 045 * 046 */ 047@ComponentAnn(name = "disjoint classes learner", shortName = "cldisjoint", version = 0.1) 048public class DisjointClassesLearner extends AbstractAxiomLearningAlgorithm<OWLDisjointClassesAxiom, OWLIndividual, OWLClass> 049 implements ClassExpressionLearningAlgorithm { 050 051 protected static final ParameterizedSparqlString CLASS_OVERLAP_QUERY = new ParameterizedSparqlString( 052 "SELECT ?cls_other (COUNT(?s) AS ?overlap) WHERE {" 053 + "?s a ?cls, ?cls_other . " 054 + "?cls_other a <http://www.w3.org/2002/07/owl#Class> . FILTER(?cls != ?cls_other)}" 055 + " GROUP BY ?cls_other"); 056 057 protected static final ParameterizedSparqlString GIVEN_CLASS_OVERLAP_QUERY = new ParameterizedSparqlString( 058 "SELECT (COUNT(?s) AS ?overlap) WHERE {?s a ?cls, ?cls_other . }"); 059 060 private static final ParameterizedSparqlString SAMPLE_QUERY = new ParameterizedSparqlString( 061 "CONSTRUCT{?s a ?entity . ?s a ?cls1 .} WHERE {?s a ?entity . OPTIONAL {?s a ?cls1 .} }"); 062 063 private List<EvaluatedDescription<? extends Score>> currentlyBestEvaluatedDescriptions; 064 private SortedSet<OWLClassExpression> subClasses; 065 066 @Unused 067 private boolean useWordNetDistance = false; 068 @ConfigOption(description = "only keep most general classes in suggestions", defaultValue = "true") 069 private boolean suggestMostGeneralClasses = true; 070 @ConfigOption(description = "include instance count / popularity when computing scores", defaultValue = "true") 071 private boolean useClassPopularity = true; 072 073 private Set<OWLClass> allClasses; 074 075 private boolean strictOWLMode = true; 076 077 public DisjointClassesLearner(SparqlEndpointKS ks) { 078 this.ks = ks; 079 080 super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s a ?cls . FILTER NOT EXISTS {?s a ?cls_dis .}}"); 081 super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s a ?cls ; a ?cls_dis .}"); 082 super.existingAxiomsTemplate = new ParameterizedSparqlString("SELECT ?cls_dis WHERE {?cls owl:disjointWith ?cls_dis .}"); 083 084 axiomType = AxiomType.DISJOINT_CLASSES; 085 } 086 087 /* (non-Javadoc) 088 * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#setEntityToDescribe(org.semanticweb.owlapi.model.OWLEntity) 089 */ 090 @Override 091 public void setEntityToDescribe(OWLClass entityToDescribe) { 092 super.setEntityToDescribe(entityToDescribe); 093 094 posExamplesQueryTemplate.setIri("cls", entityToDescribe.toStringID()); 095 negExamplesQueryTemplate.setIri("cls", entityToDescribe.toStringID()); 096 existingAxiomsTemplate.setIri("cls", entityToDescribe.toStringID()); 097 098 CLASS_OVERLAP_QUERY.setIri("cls", entityToDescribe.toStringID()); 099 GIVEN_CLASS_OVERLAP_QUERY.setIri("cls", entityToDescribe.toStringID()); 100 } 101 102 /* 103 * (non-Javadoc) 104 * 105 * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#learnAxioms() 106 */ 107 @Override 108 protected void learnAxioms() { 109 run(); 110// runBatched(); 111 } 112 113 private void run() { 114 // get the candidates 115 SortedSet<OWLClass> candidates = getCandidates(); 116 117 // check for each candidate if an overlap exists 118 int i = 1; 119 for (OWLClass cls : candidates) { 120 logger.debug("Analyzing candidate class " + cls.toStringID()); 121 progressMonitor.learningProgressChanged(axiomType, i++, candidates.size()); 122 123 // get the popularity of the candidate 124 int candidatePopularity = reasoner.getPopularity(cls); 125 126 if(candidatePopularity == 0){// skip empty classes 127 logger.warn("Cannot compute disjointness statements for empty candidate class " + cls); 128 continue; 129 } 130 131 // get the number of overlapping instances, i.e. instances asserted to both classes 132 GIVEN_CLASS_OVERLAP_QUERY.setIri("cls_other", cls.toStringID()); 133 ResultSet rs = executeSelectQuery(GIVEN_CLASS_OVERLAP_QUERY.toString()); 134 int overlap = rs.next().getLiteral("overlap").getInt(); 135 136 // compute the score 137 double score = computeScore(candidatePopularity, popularity, overlap); 138 139 int nrOfPosExamples = overlap; 140 141 int nrOfNegExamples = popularity - nrOfPosExamples; 142 143 currentlyBestAxioms.add( 144 new EvaluatedAxiom<>( 145 df.getOWLDisjointClassesAxiom(entityToDescribe, cls), 146 new AxiomScore(score, score, nrOfPosExamples, nrOfNegExamples, useSampling))); 147 } 148 } 149 150 /** 151 * In this method we try to compute the overlap with each class in one single SPARQL query. 152 * This method might be much slower as the query is much more complex. 153 */ 154 protected void runBatched() { 155 156 ResultSet rs = executeSelectQuery(CLASS_OVERLAP_QUERY.toString()); 157 ResultSetRewindable rsrw = ResultSetFactory.copyResults(rs); 158 int size = rsrw.size(); 159 rs = rsrw; 160 while (rs.hasNext()) { 161 QuerySolution qs = rsrw.next(); 162 OWLClass candidate = df.getOWLClass(IRI.create(qs.getResource("cls_other").getURI())); 163 logger.debug("Analyzing candidate class " + candidate.toStringID()); 164 progressMonitor.learningProgressChanged(axiomType, rs.getRowNumber(), size); 165 166 // get the popularity of the candidate 167 int candidatePopularity = reasoner.getPopularity(candidate); 168 169 // get the number of overlapping triples, i.e. triples with the same subject and object 170 int overlap = qs.getLiteral("overlap").getInt(); 171 172 // compute the score 173 double score = 1 - computeScore(candidatePopularity, popularity, overlap); 174 175 int nrOfPosExamples = overlap; 176 177 int nrOfNegExamples = popularity - nrOfPosExamples; 178 179 currentlyBestAxioms.add( 180 new EvaluatedAxiom<>( 181 df.getOWLDisjointClassesAxiom(entityToDescribe, candidate), 182 new AxiomScore(score, score, nrOfPosExamples, nrOfNegExamples, useSampling))); 183 } 184 } 185 186 /** 187 * Returns the candidate classes for comparison. 188 * @return the candidate classes 189 */ 190 private SortedSet<OWLClass> getCandidates(){ 191 SortedSet<OWLClass> candidates; 192 193 if (strictOWLMode) { 194 candidates = reasoner.getOWLClasses(); 195 } else { 196 candidates = reasoner.getClasses(); 197 } 198 candidates.remove(entityToDescribe); 199 200 // we do not have to consider subclasses 201 SortedSet<OWLClassExpression> subClasses = reasoner.getSubClasses(entityToDescribe, false); 202 candidates.removeAll(subClasses); 203 204 return candidates; 205 } 206 207 private double computeScore(int candidatePopularity, int popularity, int overlap){ 208 // compute the estimated precision 209 double precision = Heuristics.getConfidenceInterval95WaldAverage(candidatePopularity, overlap); 210 211 // compute the estimated recall 212 double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap); 213 214 // compute the final score 215 double score = Heuristics.getFScore(recall, precision, 1.0); 216 217 return score; 218 } 219 220 public boolean isUseWordNetDistance() { 221 return useWordNetDistance; 222 } 223 224 public void setUseWordNetDistance(boolean useWordNetDistance) { 225 this.useWordNetDistance = useWordNetDistance; 226 } 227 228 public boolean isSuggestMostGeneralClasses() { 229 return suggestMostGeneralClasses; 230 } 231 232 public void setSuggestMostGeneralClasses(boolean suggestMostGeneralClasses) { 233 this.suggestMostGeneralClasses = suggestMostGeneralClasses; 234 } 235 236 public boolean isUseClassPopularity() { 237 return useClassPopularity; 238 } 239 240 public void setUseClassPopularity(boolean useClassPopularity) { 241 this.useClassPopularity = useClassPopularity; 242 } 243 244 /* (non-Javadoc) 245 * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#getSampleQuery() 246 */ 247 @Override 248 protected ParameterizedSparqlString getSampleQuery() { 249 return SAMPLE_QUERY; 250 } 251 252 /* 253 * (non-Javadoc) 254 * 255 * @see 256 * org.dllearner.core.AbstractAxiomLearningAlgorithm#getExistingAxioms() 257 */ 258 @Override 259 protected void getExistingAxioms() { 260 ResultSet rs = executeSelectQuery(existingAxiomsTemplate.toString()); 261 QuerySolution qs; 262 while (rs.hasNext()) { 263 qs = rs.next(); 264 if(qs.get("cls_dis").isResource()){ 265 OWLClass disjointClass = df.getOWLClass(IRI.create(qs.getResource("cls_dis").getURI())); 266 existingAxioms.add(df.getOWLDisjointClassesAxiom(entityToDescribe, disjointClass)); 267 } else { 268 logger.warn("We do not support complex disjoint classes."); 269 } 270 271 } 272 } 273 274 @Override 275 public List<OWLClassExpression> getCurrentlyBestDescriptions(int nrOfDescriptions) { 276 List<OWLClassExpression> bestDescriptions = new ArrayList<>(); 277 for (EvaluatedDescription<? extends Score> evDesc : getCurrentlyBestEvaluatedDescriptions(nrOfDescriptions)) { 278 bestDescriptions.add(evDesc.getDescription()); 279 } 280 return bestDescriptions; 281 } 282 283 @Override 284 public List<? extends EvaluatedDescription<? extends Score>> getCurrentlyBestEvaluatedDescriptions(int nrOfDescriptions) { 285 int max = Math.min(currentlyBestEvaluatedDescriptions.size(), nrOfDescriptions); 286 return currentlyBestEvaluatedDescriptions.subList(0, max); 287 } 288 289 private List<EvaluatedDescription<? extends Score>> buildEvaluatedClassDescriptions(Map<OWLClass, Integer> class2Count, 290 Set<OWLClass> allClasses, int total) { 291 List<EvaluatedDescription<? extends Score>> evalDescs = new ArrayList<>(); 292 293 //Remove temporarily entityToDescribe but keep track of their count 294 // Integer all = class2Count.get(entityToDescribe); 295 class2Count.remove(entityToDescribe); 296 297 //get complete disjoint classes 298 Set<OWLClass> completeDisjointclasses = new TreeSet<>(allClasses); 299 completeDisjointclasses.removeAll(class2Count.keySet()); 300 301 // we remove the asserted subclasses here 302 completeDisjointclasses.removeAll(subClasses); 303 for (OWLClassExpression subClass : subClasses) { 304 class2Count.remove(subClass); 305 } 306 307 //drop all classes which have a super class in this set 308 if (suggestMostGeneralClasses) { 309 keepMostGeneralClasses(completeDisjointclasses); 310 } 311 312 EvaluatedDescription<? extends Score> evalDesc; 313 //firstly, create disjoint classexpressions which do not occur and give score of 1 314 for (OWLClass cls : completeDisjointclasses) { 315 if (useClassPopularity) { 316 int overlap = 0; 317 int pop; 318 if (ks.isRemote()) { 319 pop = reasoner.getPopularity(cls); 320 } else { 321 pop = ((LocalModelBasedSparqlEndpointKS) ks).getModel().getOntClass(cls.toStringID()) 322 .listInstances().toSet().size(); 323 } 324 //we skip classes with no instances 325 if (pop == 0) 326 continue; 327 328 // compute the estimated precision 329 double precision = Heuristics.getConfidenceInterval95WaldAverage(pop, overlap); 330 331 // compute the estimated recall 332 double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap); 333 334 // compute the final score 335 double score = 1 - Heuristics.getFScore(recall, precision); 336 337 evalDesc = new EvaluatedDescription(cls, new AxiomScore(score)); 338 } else { 339 evalDesc = new EvaluatedDescription(cls, new AxiomScore(1)); 340 } 341 342 evalDescs.add(evalDesc); 343 } 344 345 //secondly, create disjoint class expressions with score 1 - (#occurence/#all) 346 OWLClass cls; 347 for (Entry<OWLClass, Integer> entry : sortByValues(class2Count)) { 348 cls = entry.getKey(); 349 // drop classes from OWL and RDF namespace 350 if (cls.getIRI().isReservedVocabulary()) 351 continue; 352 if (useClassPopularity) { 353 int overlap = entry.getValue(); 354 int pop; 355 if (ks.isRemote()) { 356 pop = reasoner.getPopularity(cls); 357 } else { 358 pop = ((LocalModelBasedSparqlEndpointKS) ks).getModel().getOntClass(cls.toStringID()) 359 .listInstances().toSet().size(); 360 } 361 // we skip classes with no instances 362 if (pop == 0) 363 continue; 364 365 // compute the estimated precision 366 double precision = Heuristics.getConfidenceInterval95WaldAverage(pop, overlap); 367 368 // compute the estimated recall 369 double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap); 370 371 // compute the final score 372 double score = 1 - Heuristics.getFScore(recall, precision); 373 374 evalDesc = new EvaluatedDescription(cls, new AxiomScore(score)); 375 } else { 376 evalDesc = new EvaluatedDescription(cls, new AxiomScore(1)); 377 } 378 evalDescs.add(evalDesc); 379 } 380 381 class2Count.put(entityToDescribe, total); 382 return evalDescs; 383 } 384 385 private void keepMostGeneralClasses(Set<OWLClass> classes) { 386 if (ks.isRemote()) { 387 if (reasoner.isPrepared()) { 388 ClassHierarchy h = reasoner.getClassHierarchy(); 389 for (OWLClass nc : new HashSet<>(classes)) { 390 classes.removeAll(h.getSubClasses(nc)); 391 } 392 } 393 } else { 394 OntModel model = ((LocalModelBasedSparqlEndpointKS) ks).getModel(); 395 396 // Set<OWLClass> topClasses = new HashSet<OWLClass>(); 397 // for(OntClass cls : model.listOWLClasses().toSet()){ 398 // Set<OntClass> superClasses = cls.listSuperClasses().toSet(); 399 // if(superClasses.isEmpty() || 400 // (superClasses.size() == 1 && superClasses.contains(model.getOntClass(org.apache.jena.vocabulary.OWL.Thing.getURI())))){ 401 // topClasses.add(df.getOWLClass(IRI.create(cls.getURI())); 402 // } 403 // 404 // } 405 // classes.retainAll(topClasses); 406 for (OWLClass nc : new HashSet<>(classes)) {//System.out.print(nc + "::"); 407 for (OntClass cls : model.getOntClass(nc.toStringID()).listSubClasses().toSet()) {//System.out.print(cls + "|"); 408 classes.remove(df.getOWLClass(IRI.create(cls.getURI()))); 409 } 410 // System.out.println(); 411 } 412 413 } 414 } 415 416 private void computeAllDisjointClassAxiomsOptimized() { 417 //get number of instances of A 418 int instanceCountA = reasoner.getPopularity(entityToDescribe); 419 420 //firstly, we compute the disjointness to all sibling classes 421 Set<EvaluatedDescription> disjointessOfSiblings = computeDisjointessOfSiblings(entityToDescribe); 422 System.out.println(disjointessOfSiblings); 423 424 //we go the hierarchy up 425 SortedSet<OWLClassExpression> superClasses = reasoner.getSuperClasses(entityToDescribe); 426 for (OWLClassExpression sup : superClasses) { 427 Set<EvaluatedDescription> disjointessOfSuperClass = computeDisjointessOfSiblings(sup.asOWLClass()); 428 System.out.println(disjointessOfSuperClass); 429 } 430 } 431 432 private Set<EvaluatedDescription> computeDisjointessOfSiblings(OWLClass cls) { 433 Set<EvaluatedDescription> evaluatedDescriptions = new HashSet<>(); 434 435 //get number of instances of A 436 int instanceCountA = reasoner.getPopularity(cls); 437 438 if (instanceCountA > 0) { 439 //we compute the disjointness to all sibling classes 440 Set<OWLClass> siblingClasses = reasoner.getSiblingClasses(cls); 441 442 for (OWLClass sib : siblingClasses) { 443 //get number of instances of B 444 int instanceCountB = reasoner.getPopularity(sib); 445 446 if (instanceCountB > 0) { 447 //get number of instances of (A and B) 448 int instanceCountAB = reasoner.getPopularityOf(df.getOWLObjectIntersectionOf(cls, sib)); 449 450 // compute the estimated precision 451 double precision = Heuristics.getConfidenceInterval95WaldAverage(instanceCountB, instanceCountAB); 452 453 // compute the estimated recall 454 double recall = Heuristics.getConfidenceInterval95WaldAverage(instanceCountA, instanceCountAB); 455 456 // compute the final score 457 double score = 1 - Heuristics.getFScore(recall, precision); 458 459 EvaluatedDescription evalDesc = new EvaluatedDescription(sib, new AxiomScore(score)); 460 evaluatedDescriptions.add(evalDesc); 461 } 462 } 463 } 464 465 return evaluatedDescriptions; 466 } 467 468 public EvaluatedAxiom<OWLDisjointClassesAxiom> computeDisjointess(OWLClass clsA, OWLClass clsB) { 469 logger.debug("Computing disjointness between " + clsA + " and " + clsB + "..."); 470 471 //if clsA = clsB 472 if (clsA.equals(clsB)) { 473 return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB), 474 new AxiomScore(0d, 1d)); 475 } 476 477 //if the classes are connected via subsumption we assume that they are not disjoint 478 if (reasoner.isSuperClassOf(clsA, clsB) || reasoner.isSuperClassOf(clsB, clsA)) { 479 return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB), 480 new AxiomScore(0d, 1d)); 481 } 482 483 double scoreValue = 0; 484 485 //get number of instances of A 486 int instanceCountA = reasoner.getPopularity(clsA); 487 488 //get number of instances of B 489 int instanceCountB = reasoner.getPopularity(clsB); 490 491 if (instanceCountA > 0 && instanceCountB > 0) { 492 //get number of instances of (A and B) 493 int instanceCountAB = reasoner.getPopularityOf(df.getOWLObjectIntersectionOf(clsA, clsB)); 494 495 // compute the estimated precision 496 double precision = Heuristics.getConfidenceInterval95WaldAverage(instanceCountB, instanceCountAB); 497 498 // compute the estimated recall 499 double recall = Heuristics.getConfidenceInterval95WaldAverage(instanceCountA, instanceCountAB); 500 501 // compute the final score 502 scoreValue = 1 - Heuristics.getFScore(recall, precision); 503 504 } 505 506 AxiomScore score = new AxiomScore(scoreValue); 507 508 return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB), score); 509 } 510 511 public Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> computeSchemaDisjointness() { 512 Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> axioms = new HashSet<>(); 513 514 Set<OWLClass> classes = reasoner.getOWLClasses("http://dbpedia.org/ontology/"); 515 computeDisjointness(classes); 516 517 //start from the top level classes, i.e. the classes whose direct super class is owl:Thing 518 SortedSet<OWLClassExpression> topLevelClasses = reasoner.getMostGeneralClasses(); 519 axioms.addAll(computeDisjointness(asOWLClasses(topLevelClasses))); 520 521 for (OWLClassExpression cls : topLevelClasses) { 522 523 } 524 525 return axioms; 526 } 527 528 public Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> computeDisjointness(Set<OWLClass> classes) { 529 Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> axioms = new HashSet<>(); 530 531 for (OWLClass clsA : classes) { 532 for (OWLClass clsB : classes) { 533 axioms.add(computeDisjointess(clsA, clsB)); 534 } 535 } 536 537 return axioms; 538 } 539 540 public static Set<OWLClass> asOWLClasses(Set<OWLClassExpression> descriptions) { 541 Set<OWLClass> classes = descriptions.stream() 542 .filter(description -> !description.isAnonymous()) 543 .map(OWLClassExpression::asOWLClass) 544 .collect(Collectors.toCollection(TreeSet::new)); 545 return classes; 546 } 547 548 public static void main(String[] args) throws Exception { 549 SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org")); 550 ks.init(); 551 552 DisjointClassesLearner la = new DisjointClassesLearner(ks); 553 la.setEntityToDescribe(new OWLClassImpl(IRI.create("http://dbpedia.org/ontology/Actor"))); 554 la.setUseSampling(false); 555 la.init(); 556 557 la.start(); 558 559 la.getCurrentlyBestAxioms(10); 560 } 561}