001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.celoe; 020 021import com.google.common.collect.Sets; 022import com.jamonapi.Monitor; 023import com.jamonapi.MonitorFactory; 024import org.apache.log4j.FileAppender; 025import org.apache.log4j.Level; 026import org.apache.log4j.Logger; 027import org.apache.log4j.PatternLayout; 028import org.dllearner.core.*; 029import org.dllearner.core.config.ConfigOption; 030import org.dllearner.core.owl.ClassHierarchy; 031import org.dllearner.core.owl.DatatypePropertyHierarchy; 032import org.dllearner.core.owl.ObjectPropertyHierarchy; 033import org.dllearner.kb.OWLFile; 034import org.dllearner.learningproblems.ClassAsInstanceLearningProblem; 035import org.dllearner.learningproblems.ClassLearningProblem; 036import org.dllearner.learningproblems.PosNegLP; 037import org.dllearner.learningproblems.PosOnlyLP; 038import org.dllearner.reasoning.ClosedWorldReasoner; 039import org.dllearner.reasoning.SPARQLReasoner; 040import org.dllearner.refinementoperators.*; 041import org.dllearner.utilities.Files; 042import org.dllearner.utilities.Helper; 043import org.dllearner.utilities.OWLAPIUtils; 044import org.dllearner.utilities.TreeUtils; 045import org.dllearner.utilities.datastructures.SynchronizedSearchTree; 046import org.dllearner.utilities.owl.*; 047import org.semanticweb.owlapi.model.*; 048import org.springframework.beans.factory.annotation.Autowired; 049import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl; 050 051import java.io.File; 052import java.util.*; 053import java.util.concurrent.ExecutionException; 054import java.util.concurrent.ExecutorService; 055import java.util.concurrent.Executors; 056import java.util.concurrent.Future; 057 058/** 059 * The PCELOE is an experimental, parallel implementation of the CELOE algorithm. 060 * 061 * @author Lorenz Buehmann 062 * 063 */ 064@ComponentAnn(name="PCELOE", shortName="pceloe", version=1.0, description="PCELOE is an experimental, parallel implementation of the CELOE algorithm.") 065public class PCELOE extends AbstractCELA { 066 067 private static Logger logger = Logger.getLogger(PCELOE.class); 068 069 private boolean isRunning = false; 070 private boolean stop = false; 071 072 @ConfigOption(description = "the refinement operator instance to use") 073 private LengthLimitedRefinementOperator operator; 074 075 private SynchronizedSearchTree<OENode> searchTree; 076 @ConfigOption(defaultValue="celoe_heuristic") 077 private AbstractHeuristic heuristic; 078 // the class with which we start the refinement process 079 @ConfigOption(defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.") 080 private OWLClassExpression startClass; 081 082 // all descriptions in the search tree plus those which were too weak (for fast redundancy check) 083// private TreeSet<OWLClassExpression> descriptions; 084 private SortedSet<OWLClassExpression> descriptions; 085 086 // if true, then each solution is evaluated exactly instead of approximately 087 // private boolean exactBestDescriptionEvaluation = false; 088 @ConfigOption(defaultValue="false", description="Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.") 089 private boolean singleSuggestionMode; 090 private OWLClassExpression bestDescription; 091 private double bestAccuracy = Double.MIN_VALUE; 092 093 private OWLClass classToDescribe; 094 // examples are either 1.) instances of the class to describe 2.) positive examples 095 // 3.) union of pos.+neg. examples depending on the learning problem at hand 096 private Set<OWLIndividual> examples; 097 098 // CELOE was originally created for learning classes in ontologies, but also 099 // works for other learning problem types 100 private boolean isClassLearningProblem; 101 private boolean isEquivalenceProblem; 102 103 private long nanoStartTime; 104 105 // important parameters (non-config options but internal) 106 private double noise; 107 108 private boolean filterFollowsFromKB; 109 110 // less important parameters 111 // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity 112 // but it can also suppress quite useful expressions 113 private boolean forceMutualDifference = false; 114 115 // statistical variables 116 private int expressionTests = 0; 117 private int minHorizExp = 0; 118 private int maxHorizExp = 0; 119 private long totalRuntimeNs = 0; 120 121 // TODO: turn those into config options 122 123 // important: do not initialise those with empty sets 124 // null = no settings for allowance / ignorance 125 // empty set = allow / ignore nothing (it is often not desired to allow no class!) 126 Set<OWLClass> allowedConcepts = null; 127 Set<OWLClass> ignoredConcepts = null; 128 129 // important: do not initialise those with empty sets 130 // null = no settings for allowance / ignorance 131 // empty set = allow / ignore nothing (it is often not desired to allow no class!) 132 @ConfigOption(defaultValue="false", description="specifies whether to write a search tree") 133 private boolean writeSearchTree = false; 134 135 @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree") 136 private String searchTreeFile = "log/searchTree.txt"; 137 138 @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree") 139 private boolean replaceSearchTree = false; 140 141 @ConfigOption(defaultValue="10", description="Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions).") 142 private int maxNrOfResults = 10; 143 144 @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples") 145 private double noisePercentage = 0.0; 146 147 @ConfigOption(defaultValue="false", description="If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.") 148 private boolean filterDescriptionsFollowingFromKB = false; 149 150 @ConfigOption(defaultValue="false", description="If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.") 151 private boolean reuseExistingDescription = false; 152 153 @ConfigOption(defaultValue="0", description="The maximum number of candidate hypothesis the algorithm is allowed to test (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") 154 private int maxClassExpressionTests = 0; 155 156 @ConfigOption(defaultValue="0", description = "The maximum number of candidate hypothesis the algorithm is allowed after an improvement in accuracy (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") 157 private int maxClassExpressionTestsAfterImprovement = 0; 158 159 @ConfigOption(defaultValue = "10", description = "maximum execution of the algorithm in seconds") 160 private int maxExecutionTimeInSeconds = 10; 161 162 @ConfigOption(defaultValue = "0", description = "maximum execution of the algorithm in seconds") 163 private int maxExecutionTimeInSecondsAfterImprovement = 0; 164 165 @ConfigOption(defaultValue="false", description="specifies whether to terminate when noise criterion is met") 166 private boolean terminateOnNoiseReached = false; 167 168 @ConfigOption(defaultValue="7", description="maximum depth of description") 169 private double maxDepth = 7; 170 171 @ConfigOption(defaultValue="false", description="algorithm will terminate immediately when a correct definition is found") 172 private boolean stopOnFirstDefinition = false; 173 174 @ConfigOption(defaultValue = "false", description = "whether to try and refine solutions which already have accuracy value of 1") 175 private boolean expandAccuracy100Nodes = false; 176 private double currentHighestAccuracy; 177 178 @ConfigOption(defaultValue="2", description="number of threads running in parallel") 179 private int nrOfThreads = 2; 180 181 private int expressionTestCountLastImprovement; 182 private long timeLastImprovement = 0; 183 184 private Set<OENode> currentlyProcessedNodes = Collections.synchronizedSet(new HashSet<>()); 185 private volatile double highestAccuracy = 0.0; 186 187 public PCELOE() {} 188 189 public PCELOE(PCELOE celoe){ 190 setReasoner(celoe.reasoner); 191 setLearningProblem(celoe.learningProblem); 192 193 setAllowedConcepts(celoe.getAllowedConcepts()); 194 setAllowedObjectProperties(celoe.getAllowedObjectProperties()); 195 setAllowedDataProperties(celoe.getAllowedDataProperties()); 196 197 setIgnoredConcepts(celoe.ignoredConcepts); 198 setIgnoredObjectProperties(celoe.getIgnoredObjectProperties()); 199 setIgnoredDataProperties(celoe.getIgnoredDataProperties()); 200 201 setExpandAccuracy100Nodes(celoe.expandAccuracy100Nodes); 202 setFilterDescriptionsFollowingFromKB(celoe.filterDescriptionsFollowingFromKB); 203 setHeuristic(celoe.heuristic); 204 205 setMaxClassExpressionTests(celoe.maxClassExpressionTests); 206 setMaxClassExpressionTestsAfterImprovement(celoe.maxClassExpressionTestsAfterImprovement); 207 setMaxDepth(celoe.maxDepth); 208 setMaxExecutionTimeInSeconds(celoe.maxExecutionTimeInSeconds); 209 setMaxExecutionTimeInSecondsAfterImprovement(celoe.maxExecutionTimeInSecondsAfterImprovement); 210 setMaxNrOfResults(celoe.maxNrOfResults); 211 setNoisePercentage(celoe.noisePercentage); 212 213 RhoDRDown op = new RhoDRDown((RhoDRDown)celoe.operator); 214 try { 215 op.init(); 216 } catch (ComponentInitException e) { 217 e.printStackTrace(); 218 } 219 setOperator(op); 220 221 setReuseExistingDescription(celoe.reuseExistingDescription); 222 setSingleSuggestionMode(celoe.singleSuggestionMode); 223 setStartClass(celoe.startClass); 224 setStopOnFirstDefinition(celoe.stopOnFirstDefinition); 225 setTerminateOnNoiseReached(celoe.terminateOnNoiseReached); 226 setUseMinimizer(celoe.isUseMinimizer()); 227 228 setWriteSearchTree(celoe.writeSearchTree); 229 setReplaceSearchTree(celoe.replaceSearchTree); 230 } 231 232 public PCELOE(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner) { 233 super(problem, reasoner); 234 } 235 236 @Override 237 public void init() throws ComponentInitException { 238 baseURI = reasoner.getBaseURI(); 239 prefixes = reasoner.getPrefixes(); 240 241 if(maxExecutionTimeInSeconds != 0 && maxExecutionTimeInSecondsAfterImprovement != 0) { 242 maxExecutionTimeInSeconds = Math.min(maxExecutionTimeInSeconds, maxExecutionTimeInSecondsAfterImprovement); 243 } 244 245 // TODO add comment 246 ClassHierarchy classHierarchy = initClassHierarchy(); 247 ObjectPropertyHierarchy objectPropertyHierarchy = initObjectPropertyHierarchy(); 248 DatatypePropertyHierarchy datatypePropertyHierarchy = initDataPropertyHierarchy(); 249 250 // if no one injected a heuristic, we use a default one 251 if(heuristic == null) { 252 heuristic = new OEHeuristicRuntime(); 253 heuristic.init(); 254 } 255 256 minimizer = new OWLClassExpressionMinimizer(dataFactory, reasoner); 257 258 if (writeSearchTree) { 259 File f = new File(searchTreeFile); 260 if (f.getParentFile() != null) { 261 f.getParentFile().mkdirs(); 262 } 263 Files.clearFile(f); 264 } 265 266 // start at owl:Thing by default 267 if (startClass == null) { 268 startClass = computeStartClass(); 269 } else { 270 try { 271 this.startClass = OWLAPIUtils.classExpressionPropertyExpander(this.startClass, reasoner, dataFactory); 272 } catch (Exception e) { 273 logger.warn("Error parsing start class.", e); 274 logger.warn("Using owl:Thing instead."); 275 this.startClass = dataFactory.getOWLThing(); 276 } 277 } 278 279 bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults); 280 281 isClassLearningProblem = (learningProblem instanceof ClassLearningProblem); 282 283 // we put important parameters in class variables 284 noise = noisePercentage/100d; 285 286 // (filterFollowsFromKB is automatically set to false if the problem 287 // is not a class learning problem 288 filterFollowsFromKB = filterDescriptionsFollowingFromKB && isClassLearningProblem; 289 290 // actions specific to ontology engineering 291 if(isClassLearningProblem) { 292 ClassLearningProblem problem = (ClassLearningProblem) learningProblem; 293 classToDescribe = problem.getClassToDescribe(); 294 isEquivalenceProblem = problem.isEquivalenceProblem(); 295 296 examples = reasoner.getIndividuals(classToDescribe); 297 } else if(learningProblem instanceof PosOnlyLP) { 298 examples = ((PosOnlyLP)learningProblem).getPositiveExamples(); 299 } else if(learningProblem instanceof PosNegLP) { 300 examples = Sets.union(((PosNegLP)learningProblem).getPositiveExamples(),((PosNegLP)learningProblem).getNegativeExamples()); 301 } 302 303 // create a refinement operator and pass all configuration 304 // variables to it 305 if (operator == null) { 306 // we use a default operator and inject the class hierarchy for now 307 operator = new RhoDRDown(); 308 if (operator instanceof CustomStartRefinementOperator) { 309 ((CustomStartRefinementOperator) operator).setStartClass(startClass); 310 } 311 if (operator instanceof ReasoningBasedRefinementOperator) { 312 ((ReasoningBasedRefinementOperator) operator).setReasoner(reasoner); 313 } 314 operator.init(); 315 } 316 if (operator instanceof CustomHierarchyRefinementOperator) { 317 ((CustomHierarchyRefinementOperator) operator).setClassHierarchy(classHierarchy); 318 ((CustomHierarchyRefinementOperator) operator).setObjectPropertyHierarchy(objectPropertyHierarchy); 319 ((CustomHierarchyRefinementOperator) operator).setDataPropertyHierarchy(datatypePropertyHierarchy); 320 } 321 322 reasoner.setSynchronized(); 323 operator = new SynchronizedRefinementOperator(operator); 324 325 initialized = true; 326 } 327 328 @Override 329 public void start() { 330 logger.info("starting PCELOE with " + nrOfThreads + " threads"); 331// System.out.println(configurator.getMaxExecutionTimeInSeconds()); 332 333 stop = false; 334 isRunning = true; 335 reset(); 336 nanoStartTime = System.nanoTime(); 337 338 addNode(startClass, null); 339 340 int nrOfWorkers = nrOfThreads; 341 if(nrOfWorkers == 0){ 342 nrOfWorkers = Runtime.getRuntime().availableProcessors(); 343 } 344 ExecutorService service = Executors.newFixedThreadPool(nrOfWorkers); 345 346 List<Runnable> tasks = new ArrayList<>(); 347 348 boolean shareRefinementOperator = false; 349 350 for(int i = 0; i < nrOfWorkers; i++){ 351 PCELOEWorker worker; 352 if(shareRefinementOperator) { 353 ClosedWorldReasoner reasonerCopy = new ClosedWorldReasoner(); 354 reasonerCopy.setSources(reasoner.getSources()); 355 try { 356 reasonerCopy.init(); 357 } catch (ComponentInitException e1) { 358 // TODO Auto-generated catch block 359 e1.printStackTrace(); 360 } 361 362 if(operator instanceof SynchronizedRefinementOperator) { 363 operator = ((SynchronizedRefinementOperator) operator).getDelegate(); 364 } 365 RhoDRDown op = new RhoDRDown((RhoDRDown) operator); 366 op.setReasoner(reasonerCopy); 367 try { 368 op.init(); 369 } catch (ComponentInitException e) { 370 // TODO Auto-generated catch block 371 e.printStackTrace(); 372 } 373 worker = new PCELOEWorker(op); 374 } else { 375 worker = new PCELOEWorker(); 376 377 } 378 tasks.add(worker); 379 } 380 nanoStartTime = System.nanoTime(); 381 382 //needed to block until all threads have been finished, because otherwise the main thread outputs the result to early 383 List<Future> futures = new ArrayList<>(); 384 for(Runnable task : tasks){ 385 futures.add(service.submit(task)); 386 } 387 for(Future future : futures){ 388 try { 389 future.get(); 390 } catch (InterruptedException | ExecutionException e) { 391 e.printStackTrace(); 392 } 393 } 394 395 if (stop) { 396 logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + searchTree.size() + " nodes in the search tree.\n"); 397 } else { 398 logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(System.nanoTime()-nanoStartTime) + ", "+expressionTests+" descriptions tested, " + searchTree.size() + " nodes in the search tree).\n"); 399 logger.info(reasoner.toString()); 400 } 401 402 if(singleSuggestionMode) { 403 bestEvaluatedDescriptions.add(bestDescription, bestAccuracy, learningProblem); 404 } 405 406 // print solution(s) 407 logger.info("solutions:\n" + getSolutionString()); 408 409// System.out.println(startNode.toTreeString(baseURI)); 410 411 isRunning = false; 412 service.shutdown(); 413// System.out.println("isRunning: " + isRunning); 414 } 415 416 /* 417 * Compute the start class in the search space from which the refinement will start. 418 * We use the intersection of super classes for definitions (since it needs to 419 * capture all instances), but owl:Thing for learning subclasses (since it is 420 * superfluous to add super classes in this case) 421 */ 422 private OWLClassExpression computeStartClass() { 423 OWLClassExpression startClass = dataFactory.getOWLThing(); 424 425 if(isClassLearningProblem) { 426 if(isEquivalenceProblem) { 427 Set<OWLClassExpression> existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe); 428 if(reuseExistingDescription && (existingDefinitions.size() > 0)) { 429 // the existing definition is reused, which in the simplest case means to 430 // use it as a start class or, if it is already too specific, generalise it 431 432 // pick the longest existing definition as candidate 433 OWLClassExpression existingDefinition = null; 434 int highestLength = 0; 435 for(OWLClassExpression exDef : existingDefinitions) { 436 if(OWLClassExpressionUtils.getLength(exDef) > highestLength) { 437 existingDefinition = exDef; 438 highestLength = OWLClassExpressionUtils.getLength(exDef); 439 } 440 } 441 442 LinkedList<OWLClassExpression> startClassCandidates = new LinkedList<>(); 443 startClassCandidates.add(existingDefinition); 444 // hack for RhoDRDown 445 if(operator instanceof RhoDRDown) { 446 ((RhoDRDown)operator).setDropDisjuncts(true); 447 } 448 LengthLimitedRefinementOperator upwardOperator = new OperatorInverter(operator); 449 450 // use upward refinement until we find an appropriate start class 451 boolean startClassFound = false; 452 OWLClassExpression candidate; 453 do { 454 candidate = startClassCandidates.pollFirst(); 455 if(((ClassLearningProblem)learningProblem).getRecall(candidate)<1.0) { 456 // add upward refinements to list 457 Set<OWLClassExpression> refinements = upwardOperator.refine(candidate, OWLClassExpressionUtils.getLength(candidate)); 458// System.out.println("ref: " + refinements); 459 LinkedList<OWLClassExpression> refinementList = new LinkedList<>(refinements); 460// Collections.reverse(refinementList); 461// System.out.println("list: " + refinementList); 462 startClassCandidates.addAll(refinementList); 463// System.out.println("candidates: " + startClassCandidates); 464 } else { 465 startClassFound = true; 466 } 467 } while(!startClassFound); 468 startClass = candidate; 469 470 if(startClass.equals(existingDefinition)) { 471 logger.info("Reusing existing class expression " + OWLAPIRenderers.toManchesterOWLSyntax(startClass) + " as start class for learning algorithm."); 472 } else { 473 logger.info("Generalised existing class expression " + OWLAPIRenderers.toManchesterOWLSyntax(existingDefinition) + " to " + OWLAPIRenderers.toManchesterOWLSyntax(startClass) + ", which is used as start class for the learning algorithm."); 474 } 475 476 if(operator instanceof RhoDRDown) { 477 ((RhoDRDown)operator).setDropDisjuncts(false); 478 } 479 480 } else { 481 Set<OWLClassExpression> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe, true); 482 if(superClasses.size() > 1) { 483 startClass = dataFactory.getOWLObjectIntersectionOf(superClasses); 484 } else if(superClasses.size() == 1){ 485 startClass = (OWLClassExpression) superClasses.toArray()[0]; 486 } else { 487 startClass = dataFactory.getOWLThing(); 488 logger.warn(classToDescribe + " is equivalent to owl:Thing. Usually, it is not " + 489 "sensible to learn a class expression in this case."); 490 } 491 } 492 } 493 } 494 return startClass; 495 } 496 497 private OENode getNextNodeToExpand() { 498 // we expand the best node of those, which have not achieved 100% accuracy 499 // already and have a horizontal expansion equal to their length 500 // (rationale: further extension is likely to add irrelevant syntactical constructs) 501 synchronized (searchTree) { 502 Iterator<OENode> it = searchTree.descendingIterator(); 503 504 while(it.hasNext()) { 505 OENode node = it.next(); 506 if (currentlyProcessedNodes.contains(node)) continue; 507 508 if (isExpandAccuracy100Nodes() && node.getHorizontalExpansion() < OWLClassExpressionUtils.getLength(node.getDescription())) { 509 currentlyProcessedNodes.add(node); 510 return node; 511 } else { 512 if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < OWLClassExpressionUtils.getLength(node.getDescription())) { 513 currentlyProcessedNodes.add(node); 514 return node; 515 } 516 } 517 } 518 } 519 520 // this should practically never be called, since for any reasonable learning 521 // task, we will always have at least one node with less than 100% accuracy 522 return null;//nodes.last(); 523 } 524 525 // expand node horizontically 526 private TreeSet<OWLClassExpression> refineNode(LengthLimitedRefinementOperator operator, OENode node) { 527 MonitorFactory.getTimeMonitor("refineNode").start(); 528 // we have to remove and add the node since its heuristic evaluation changes through the expansion 529 // (you *must not* include any criteria in the heuristic which are modified outside of this method, 530 // otherwise you may see rarely occurring but critical false ordering in the nodes set) 531 searchTree.updatePrepare(node); 532 int horizExp = node.getHorizontalExpansion(); 533 TreeSet<OWLClassExpression> refinements = (TreeSet<OWLClassExpression>) operator.refine(node.getDescription(), horizExp+1); 534// System.out.println("refinements: " + refinements); 535 node.incHorizontalExpansion(); 536 node.setRefinementCount(refinements.size()); 537// System.out.println("refined node: " + node); 538 searchTree.updateDone(node); 539 MonitorFactory.getTimeMonitor("refineNode").stop(); 540 return refinements; 541 } 542 543 /** 544 * Add node to search tree if it is not too weak. 545 * @return TRUE if node was added and FALSE otherwise 546 */ 547 private boolean addNode(OWLClassExpression description, OENode parentNode) { 548 MonitorFactory.getTimeMonitor("addNode").start(); 549 550 // redundancy check (return if redundant) 551 boolean nonRedundant = descriptions.add(description); 552 if(!nonRedundant) { 553 return false; 554 } 555 556 // check whether the class expression is allowed 557 if(!isDescriptionAllowed(description, parentNode)) { 558 return false; 559 } 560 561 // quality of class expression (return if too weak) 562 Monitor mon = MonitorFactory.start("lp"); 563 double accuracy = learningProblem.getAccuracyOrTooWeak(description, noise); 564 mon.stop(); 565 566 // issue a warning if accuracy is not between 0 and 1 or -1 (too weak) 567 if(accuracy > 1.0 || (accuracy < 0.0 && accuracy != -1)) { 568 throw new RuntimeException("Invalid accuracy value " + accuracy + " for class expression " + description + 569 ". This could be caused by a bug in the heuristic measure and should be reported to the DL-Learner bug tracker."); 570 } 571 572 expressionTests++; 573 574 // return FALSE if 'too weak' 575 if(accuracy == -1) { 576 return false; 577 } 578 579 OENode node = new OENode(description, accuracy); 580 581 // link to parent (unless start node) 582 searchTree.addNode(parentNode, node); 583 584 // in some cases (e.g. mutation) fully evaluating even a single class expression is too expensive 585 // due to the high number of examples -- so we just stick to the approximate accuracy 586 if(singleSuggestionMode) { 587 if(accuracy > bestAccuracy) { 588 bestAccuracy = accuracy; 589 bestDescription = description; 590 logger.info("more accurate (" + dfPercent.format(bestAccuracy) + ") class expression found: " + descriptionToString(bestDescription)); // + getTemporaryString(bestDescription)); 591 } 592 return true; 593 } 594 595 // maybe add to best descriptions (method keeps set size fixed); 596 // we need to make sure that this does not get called more often than 597 // necessary since rewriting is expensive 598 boolean isCandidate = !bestEvaluatedDescriptions.isFull(); 599 if(!isCandidate) { 600 EvaluatedDescription worst = bestEvaluatedDescriptions.getWorst(); 601 double accThreshold = worst.getAccuracy(); 602 isCandidate = 603 (accuracy > accThreshold || 604 (accuracy >= accThreshold && OWLClassExpressionUtils.getLength(description) < worst.getDescriptionLength())); 605 } 606 607 if(isCandidate) { 608 OWLClassExpression niceDescription = node.getExpression();//rewrite(node.getExpression()); 609 610 if(niceDescription.equals(classToDescribe)) { 611 return false; 612 } 613 614 if(!isDescriptionAllowed(niceDescription, node)) { 615 return false; 616 } 617 618 // another test: none of the other suggested descriptions should be 619 // a subdescription of this one unless accuracy is different 620 // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing 621 // A is not a candidate; on the other hand this suppresses many meaningless extensions of A 622 boolean shorterDescriptionExists = false; 623 if(forceMutualDifference) { 624 for(EvaluatedDescription<? extends Score> ed : bestEvaluatedDescriptions.getSet()) { 625 if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { 626// System.out.println("shorter: " + ed.getDescription()); 627 shorterDescriptionExists = true; 628 break; 629 } 630 } 631 } 632 633// System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription); 634 635 if(!shorterDescriptionExists) { 636 if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) { 637// System.out.println(node + "->" + niceDescription); 638 bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem); 639// System.out.println("acc: " + accuracy); 640// System.out.println(bestEvaluatedDescriptions); 641 } 642 } 643 644// bestEvaluatedDescriptions.add(node.getDescription(), accuracy, learningProblem); 645 646// System.out.println(bestEvaluatedDescriptions.getSet().size()); 647 } 648 649 return true; 650 } 651 652 // checks whether the class expression is allowed 653 private boolean isDescriptionAllowed(OWLClassExpression description, OENode parentNode) { 654 if(isClassLearningProblem) { 655 if(isEquivalenceProblem) { 656 // the class to learn must not appear on the outermost property level 657 if(occursOnFirstLevel(description, classToDescribe)) { 658 return false; 659 } 660 if(occursOnSecondLevel(description, classToDescribe)) { 661 return false; 662 } 663 } else { 664 // none of the superclasses of the class to learn must appear on the 665 // outermost property level 666 TreeSet<OWLClassExpression> toTest = new TreeSet<>(); 667 toTest.add(classToDescribe); 668 while(!toTest.isEmpty()) { 669 OWLClassExpression d = toTest.pollFirst(); 670 if(occursOnFirstLevel(description, d)) { 671 return false; 672 } 673 toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d)); 674 } 675 } 676 } else if (learningProblem instanceof ClassAsInstanceLearningProblem) { 677 return true; 678 } 679 680 // perform forall sanity tests 681 if (parentNode != null && 682 (ConceptTransformation.getForallOccurences(description) > ConceptTransformation.getForallOccurences(parentNode.getDescription()))) { 683 // we have an additional \forall construct, so we now fetch the contexts 684 // in which it occurs 685 SortedSet<PropertyContext> contexts = ConceptTransformation.getForallContexts(description); 686 SortedSet<PropertyContext> parentContexts = ConceptTransformation.getForallContexts(parentNode.getDescription()); 687 contexts.removeAll(parentContexts); 688// System.out.println("parent description: " + parentNode.getDescription()); 689// System.out.println("description: " + description); 690// System.out.println("contexts: " + contexts); 691 // we now have to perform sanity checks: if \forall is used, then there 692 // should be at least on class instance which has a filler at the given context 693 for(PropertyContext context : contexts) { 694 // transform [r,s] to \exists r.\exists s.\top 695 OWLClassExpression existentialContext = context.toExistentialContext(); 696 boolean fillerFound = false; 697 if(reasoner instanceof SPARQLReasoner) { 698 SortedSet<OWLIndividual> individuals = reasoner.getIndividuals(existentialContext); 699 fillerFound = !Sets.intersection(individuals, examples).isEmpty(); 700 } else { 701 for(OWLIndividual instance : examples) { 702 if(reasoner.hasType(existentialContext, instance)) { 703 fillerFound = true; 704 break; 705 } 706 } 707 } 708 709 // if we do not find a filler, this means that putting \forall at 710 // that position is not meaningful 711 if(!fillerFound) { 712 return false; 713 } 714 } 715 } 716 717 // we do not want to have negations of sibling classes on the outermost level 718 // (they are expressed more naturally by saying that the siblings are disjoint, 719 // so it is reasonable not to include them in solutions) 720// Set<OWLClassExpression> siblingClasses = reasoner.getClassHierarchy().getSiblingClasses(classToDescribe); 721// for now, we just disable negation 722 723 return true; 724 } 725 726 // determine whether a named class occurs on the outermost level, i.e. property depth 0 727 // (it can still be at higher depth, e.g. if intersections are nested in unions) 728 private boolean occursOnFirstLevel(OWLClassExpression description, OWLClassExpression cls) { 729 if(cls.isOWLThing()) { 730 return false; 731 } 732 return (description instanceof OWLNaryBooleanClassExpression && 733 ((OWLNaryBooleanClassExpression)description).getOperands().contains(cls)); 734// return description.containsConjunct(cls) || 735// (description instanceof OWLObjectUnionOf && ((OWLObjectUnionOf) description).getOperands().contains(cls)); 736 } 737 738 // determine whether a named class occurs on the outermost level, i.e. property depth 0 739 // (it can still be at higher depth, e.g. if intersections are nested in unions) 740 private boolean occursOnSecondLevel(OWLClassExpression description, OWLClassExpression cls) { 741// SortedSet<OWLClassExpression> superClasses = reasoner.getSuperClasses(cls); 742// if(description instanceof OWLObjectIntersectionOf) { 743// List<OWLClassExpression> operands = ((OWLObjectIntersectionOf) description).getOperandsAsList(); 744// 745// for (OWLClassExpression op : operands) { 746// if(superClasses.contains(op) || 747// (op instanceof OWLObjectUnionOf && !Sets.intersection(((OWLObjectUnionOf)op).getOperands(),superClasses).isEmpty())) { 748// for (OWLClassExpression op2 : operands) { 749// if((op2 instanceof OWLObjectUnionOf && ((OWLObjectUnionOf)op2).getOperands().contains(cls))) { 750// return true; 751// } 752// } 753// } 754// } 755// 756// for (OWLClassExpression op1 : operands) { 757// for (OWLClassExpression op2 : operands) { 758// if(!op1.isAnonymous() && op2 instanceof OWLObjectUnionOf) { 759// for (OWLClassExpression op3 : ((OWLObjectUnionOf)op2).getOperands()) { 760// if(!op3.isAnonymous()) {// A AND B with Disj(A,B) 761// if(reasoner.isDisjoint(op1.asOWLClass(), op3.asOWLClass())) { 762// return true; 763// } 764// } else {// A AND NOT A 765// if(op3 instanceof OWLObjectComplementOf && ((OWLObjectComplementOf)op3).getOperand().equals(op1)) { 766// return true; 767// } 768// } 769// } 770// } 771// } 772// } 773// } 774 775 return false; 776 } 777 778 private boolean terminationCriteriaSatisfied() { 779 return 780 stop || 781 (maxClassExpressionTestsAfterImprovement != 0 && (expressionTests - expressionTestCountLastImprovement >= maxClassExpressionTestsAfterImprovement)) || 782 (maxClassExpressionTests != 0 && (expressionTests >= maxClassExpressionTests)) || 783 (maxExecutionTimeInSecondsAfterImprovement != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSecondsAfterImprovement* 1000000000L))) || 784 (maxExecutionTimeInSeconds != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds* 1000000000L))) || 785 (terminateOnNoiseReached && (100*getCurrentlyBestAccuracy()>=100-noisePercentage)) || 786 (stopOnFirstDefinition && (getCurrentlyBestAccuracy() >= 1)); 787 } 788 789 private void reset() { 790 // set all values back to their default values (used for running 791 // the algorithm more than once) 792// nodes = new TreeSet<OENode>(heuristic); 793 searchTree = new SynchronizedSearchTree(heuristic); 794 //Sets.synchronizedNavigableSet(new TreeSet<OENode>(Collections.reverseOrder(heuristic))); 795 descriptions = Collections.synchronizedSortedSet(new TreeSet<>()); 796 bestEvaluatedDescriptions.getSet().clear(); 797 expressionTests = 0; 798 highestAccuracy = 0.0; 799 } 800 801 private void printAlgorithmRunStats() { 802 if (stop) { 803 logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + searchTree.size() + " nodes in the search tree.\n"); 804 } else { 805 totalRuntimeNs = System.nanoTime()-nanoStartTime; 806 logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(totalRuntimeNs) + ", "+expressionTests+" descriptions tested, " + searchTree.size() + " nodes in the search tree).\n"); 807 logger.info(reasoner.toString()); 808 } 809 } 810 811 private void showIfBetterSolutionsFound() { 812 if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > currentHighestAccuracy) { 813 currentHighestAccuracy = bestEvaluatedDescriptions.getBestAccuracy(); 814 expressionTestCountLastImprovement = expressionTests; 815 timeLastImprovement = System.nanoTime(); 816 long durationInMillis = getCurrentRuntimeInMilliSeconds(); 817 String durationStr = getDurationAsString(durationInMillis); 818 logger.info("more accurate (" + dfPercent.format(currentHighestAccuracy) + ") class expression found after " + durationStr + ": " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); 819 } 820 } 821 822 private void writeSearchTree(TreeSet<OWLClassExpression> refinements) { 823 StringBuilder treeString = new StringBuilder("best node: ").append(bestEvaluatedDescriptions.getBest()).append("\n"); 824 if (refinements.size() > 1) { 825 treeString.append("all expanded nodes:\n"); 826 for (OWLClassExpression ref : refinements) { 827 treeString.append(" ").append(ref).append("\n"); 828 } 829 } 830 treeString.append(TreeUtils.toTreeString(searchTree)).append("\n"); 831 832 // replace or append 833 if (replaceSearchTree) { 834 Files.createFile(new File(searchTreeFile), treeString.toString()); 835 } else { 836 Files.appendToFile(new File(searchTreeFile), treeString.toString()); 837 } 838 } 839 840 private void updateMinMaxHorizExp(OENode node) { 841 int newHorizExp = node.getHorizontalExpansion(); 842 843 // update maximum value 844 maxHorizExp = Math.max(maxHorizExp, newHorizExp); 845 846 // we just expanded a node with minimum horizontal expansion; 847 // we need to check whether it was the last one 848 if(minHorizExp == newHorizExp - 1) { 849 850 // the best accuracy that a node can achieve 851 double scoreThreshold = heuristic.getNodeScore(node) + 1 - node.getAccuracy(); 852 853 for(OENode n : searchTree.descendingSet()) { 854 if(n != node) { 855 if(n.getHorizontalExpansion() == minHorizExp) { 856 // we can stop instantly when another node with min. 857 return; 858 } 859 if(heuristic.getNodeScore(n) < scoreThreshold) { 860 // we can stop traversing nodes when their score is too low 861 break; 862 } 863 } 864 } 865 866 // inc. minimum since we found no other node which also has min. horiz. exp. 867 minHorizExp++; 868 869// System.out.println("minimum horizontal expansion is now " + minHorizExp); 870 } 871 } 872 873 @Override 874 public OWLClassExpression getCurrentlyBestDescription() { 875 EvaluatedDescription<? extends Score> ed = getCurrentlyBestEvaluatedDescription(); 876 return ed == null ? null : ed.getDescription(); 877 } 878 879 @Override 880 public List<OWLClassExpression> getCurrentlyBestDescriptions() { 881 return bestEvaluatedDescriptions.toDescriptionList(); 882 } 883 884 @Override 885 public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { 886 return bestEvaluatedDescriptions.getBest(); 887 } 888 889 @Override 890 public NavigableSet<? extends EvaluatedDescription<? extends Score>> getCurrentlyBestEvaluatedDescriptions() { 891 return bestEvaluatedDescriptions.getSet(); 892 } 893 894 public double getCurrentlyBestAccuracy() { 895 return bestEvaluatedDescriptions.getBest().getAccuracy(); 896 } 897 898 @Override 899 public boolean isRunning() { 900 return isRunning; 901 } 902 903 @Override 904 public void stop() { 905 stop = true; 906 } 907 908 public int getMaximumHorizontalExpansion() { 909 return maxHorizExp; 910 } 911 912 public int getMinimumHorizontalExpansion() { 913 return minHorizExp; 914 } 915 916 /** 917 * @return the expressionTests 918 */ 919 public int getClassExpressionTests() { 920 return expressionTests; 921 } 922 923 public LengthLimitedRefinementOperator getOperator() { 924 return operator; 925 } 926 927 @Autowired(required=false) 928 public void setOperator(LengthLimitedRefinementOperator operator) { 929 this.operator = operator; 930 } 931 932 public OWLClassExpression getStartClass() { 933 return startClass; 934 } 935 936 public void setStartClass(OWLClassExpression startClass) { 937 this.startClass = startClass; 938 } 939 940 public boolean isWriteSearchTree() { 941 return writeSearchTree; 942 } 943 944 public void setWriteSearchTree(boolean writeSearchTree) { 945 this.writeSearchTree = writeSearchTree; 946 } 947 948 public String getSearchTreeFile() { 949 return searchTreeFile; 950 } 951 952 public void setSearchTreeFile(String searchTreeFile) { 953 this.searchTreeFile = searchTreeFile; 954 } 955 956 public int getMaxNrOfResults() { 957 return maxNrOfResults; 958 } 959 960 public void setMaxNrOfResults(int maxNrOfResults) { 961 this.maxNrOfResults = maxNrOfResults; 962 } 963 964 public double getNoisePercentage() { 965 return noisePercentage; 966 } 967 968 public void setNoisePercentage(double noisePercentage) { 969 this.noisePercentage = noisePercentage; 970 } 971 972 public boolean isFilterDescriptionsFollowingFromKB() { 973 return filterDescriptionsFollowingFromKB; 974 } 975 976 public void setFilterDescriptionsFollowingFromKB(boolean filterDescriptionsFollowingFromKB) { 977 this.filterDescriptionsFollowingFromKB = filterDescriptionsFollowingFromKB; 978 } 979 980 public boolean isReplaceSearchTree() { 981 return replaceSearchTree; 982 } 983 984 public void setReplaceSearchTree(boolean replaceSearchTree) { 985 this.replaceSearchTree = replaceSearchTree; 986 } 987 988 @Deprecated 989 public int getMaxClassDescriptionTests() { 990 return maxClassExpressionTests; 991 } 992 993 @Deprecated 994 public void setMaxClassDescriptionTests(int maxClassDescriptionTests) { 995 this.maxClassExpressionTests = maxClassDescriptionTests; 996 } 997 998 public boolean isTerminateOnNoiseReached() { 999 return terminateOnNoiseReached; 1000 } 1001 1002 public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) { 1003 this.terminateOnNoiseReached = terminateOnNoiseReached; 1004 } 1005 1006 public boolean isReuseExistingDescription() { 1007 return reuseExistingDescription; 1008 } 1009 1010 public void setReuseExistingDescription(boolean reuseExistingDescription) { 1011 this.reuseExistingDescription = reuseExistingDescription; 1012 } 1013 1014 public AbstractHeuristic getHeuristic() { 1015 return heuristic; 1016 } 1017 1018 @Autowired(required=false) 1019 public void setHeuristic(AbstractHeuristic heuristic) { 1020 this.heuristic = heuristic; 1021 } 1022 1023 @Deprecated 1024 public int getMaxClassExpressionTestsWithoutImprovement() { 1025 return maxClassExpressionTestsAfterImprovement; 1026 } 1027 1028 @Deprecated 1029 public void setMaxClassExpressionTestsWithoutImprovement( 1030 int maxClassExpressionTestsWithoutImprovement) { 1031 this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsWithoutImprovement; 1032 } 1033 1034 public int getMaxExecutionTimeInSecondsAfterImprovement() { 1035 return maxExecutionTimeInSecondsAfterImprovement; 1036 } 1037 1038 public void setMaxExecutionTimeInSecondsAfterImprovement( 1039 int maxExecutionTimeInSecondsAfterImprovement) { 1040 this.maxExecutionTimeInSecondsAfterImprovement = maxExecutionTimeInSecondsAfterImprovement; 1041 } 1042 1043 public boolean isSingleSuggestionMode() { 1044 return singleSuggestionMode; 1045 } 1046 1047 public void setSingleSuggestionMode(boolean singleSuggestionMode) { 1048 this.singleSuggestionMode = singleSuggestionMode; 1049 } 1050 1051 public int getMaxClassExpressionTests() { 1052 return maxClassExpressionTests; 1053 } 1054 1055 public void setMaxClassExpressionTests(int maxClassExpressionTests) { 1056 this.maxClassExpressionTests = maxClassExpressionTests; 1057 } 1058 1059 public int getMaxClassExpressionTestsAfterImprovement() { 1060 return maxClassExpressionTestsAfterImprovement; 1061 } 1062 1063 public void setMaxClassExpressionTestsAfterImprovement( 1064 int maxClassExpressionTestsAfterImprovement) { 1065 this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsAfterImprovement; 1066 } 1067 1068 public double getMaxDepth() { 1069 return maxDepth; 1070 } 1071 1072 public void setMaxDepth(double maxDepth) { 1073 this.maxDepth = maxDepth; 1074 } 1075 1076 public boolean isStopOnFirstDefinition() { 1077 return stopOnFirstDefinition; 1078 } 1079 1080 public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { 1081 this.stopOnFirstDefinition = stopOnFirstDefinition; 1082 } 1083 1084 public long getTotalRuntimeNs() { 1085 return totalRuntimeNs; 1086 } 1087 1088 /** 1089 * @return the expandAccuracy100Nodes 1090 */ 1091 public boolean isExpandAccuracy100Nodes() { 1092 return expandAccuracy100Nodes; 1093 } 1094 1095 /** 1096 * @param expandAccuracy100Nodes the expandAccuracy100Nodes to set 1097 */ 1098 public void setExpandAccuracy100Nodes(boolean expandAccuracy100Nodes) { 1099 this.expandAccuracy100Nodes = expandAccuracy100Nodes; 1100 } 1101 1102 /* (non-Javadoc) 1103 * @see java.lang.Object#clone() 1104 */ 1105 @Override 1106 public Object clone() throws CloneNotSupportedException { 1107 return new PCELOE(this); 1108 } 1109 1110 public int getNrOfThreads() { 1111 return nrOfThreads; 1112 } 1113 1114 public void setNrOfThreads(int nrOfThreads) { 1115 this.nrOfThreads = nrOfThreads; 1116 } 1117 1118 private synchronized double getHighestAccuracy(){ 1119 return highestAccuracy; 1120 } 1121 1122 class PCELOEWorker implements Runnable{ 1123 1124 private LengthLimitedRefinementOperator operator; 1125 1126 public PCELOEWorker() { 1127 operator = PCELOE.this.operator; 1128 } 1129 1130 public PCELOEWorker(LengthLimitedRefinementOperator operator) { 1131 this.operator = operator; 1132 } 1133 1134 @Override 1135 public void run() { 1136 OENode nextNode; 1137 while (!terminationCriteriaSatisfied()) { 1138// String threadName = Thread.currentThread().getName(); 1139 1140 nextNode = getNextNodeToExpand(); 1141// System.out.println(threadName + " processing " + nextNode); 1142 1143 if(nextNode != null){ 1144 int horizExp = nextNode.getHorizontalExpansion(); 1145 1146 // apply refinement operator 1147 TreeSet<OWLClassExpression> refinements = refineNode(operator, nextNode); 1148 1149 while(!refinements.isEmpty() && !terminationCriteriaSatisfied()) { 1150 // pick element from set 1151 OWLClassExpression refinement = refinements.pollFirst(); 1152// System.out.println(threadName + " analysing " + refinement); 1153 1154 // get length of class expression 1155 int length = OWLClassExpressionUtils.getLength(refinement); 1156 1157 // we ignore all refinements with lower length and too high depth 1158 // (this also avoids duplicate node children) 1159 if(length > horizExp && OWLClassExpressionUtils.getDepth(refinement) <= maxDepth) { 1160 // add node to search tree 1161 boolean added = addNode(refinement, nextNode); 1162 if(added) { 1163// System.out.println(threadName + " put " + refinement); 1164 } 1165 } 1166 } 1167 currentlyProcessedNodes.remove(nextNode); 1168// showIfBetterSolutionsFound(); 1169 } 1170 1171 } 1172 } 1173 } 1174 1175 public static void main(String[] args) throws Exception{ 1176// StringRenderer.setRenderer(Rendering.DL_SYNTAX); 1177 Logger.getRootLogger().setLevel(Level.INFO); 1178 Logger.getLogger(PCELOE.class).setLevel(Level.DEBUG); 1179 Logger.getLogger(PCELOE.class).addAppender(new FileAppender(new PatternLayout( "[%t] %c: %m%n" ), "log/parallel_run.txt", false)); 1180 1181 AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); 1182 ks.init(); 1183 1184 AbstractReasonerComponent rc = new ClosedWorldReasoner(ks); 1185 rc.init(); 1186 1187 ClassLearningProblem lp = new ClassLearningProblem(rc); 1188 lp.setClassToDescribe(new OWLClassImpl(IRI.create("http://example.com/father#father"))); 1189 lp.setCheckConsistency(false); 1190 lp.init(); 1191 1192 PCELOE alg = new PCELOE(lp, rc); 1193 alg.setMaxExecutionTimeInSeconds(10); 1194 alg.setNrOfThreads(4); 1195// alg.setMaxClassDescriptionTests(200); 1196 alg.init(); 1197 1198 alg.start(); 1199 } 1200 1201}