001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.ocel; 020 021import com.google.common.collect.Sets; 022import com.jamonapi.Monitor; 023import org.apache.log4j.Level; 024import org.dllearner.accuracymethods.AccMethodNoWeakness; 025import org.dllearner.core.*; 026import org.dllearner.core.config.ConfigOption; 027import org.dllearner.core.options.CommonConfigOptions; 028import org.dllearner.core.owl.ClassHierarchy; 029import org.dllearner.core.owl.DatatypePropertyHierarchy; 030import org.dllearner.core.owl.ObjectPropertyHierarchy; 031import org.dllearner.learningproblems.*; 032import org.dllearner.reasoning.ReasonerType; 033import org.dllearner.refinementoperators.*; 034import org.dllearner.utilities.Files; 035import org.dllearner.utilities.Helper; 036import org.dllearner.utilities.JamonMonitorLogger; 037import org.dllearner.utilities.TreeUtils; 038import org.dllearner.utilities.datastructures.SearchTreeNonWeak; 039import org.dllearner.utilities.datastructures.SearchTreeNonWeakPartialSet; 040import org.dllearner.utilities.owl.ConceptTransformation; 041import org.dllearner.utilities.owl.EvaluatedDescriptionPosNegComparator; 042import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric; 043import org.dllearner.utilities.owl.OWLClassExpressionUtils; 044import org.semanticweb.owlapi.model.*; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047import org.springframework.beans.factory.annotation.Autowired; 048 049import java.io.File; 050import java.text.DecimalFormat; 051import java.util.*; 052 053/** 054 * The DL-Learner learning algorithm component for the example 055 * based refinement operator approach. It handles all 056 * configuration options, creates the corresponding objects and 057 * passes them to the actual refinement operator, heuristic, and 058 * learning algorithm implementations. 059 * 060 * Note: The options supported by the ROLearner component and this 061 * one are not equal. Options that have been dropped for now: 062 * - horizontal expansion factor: The goal of the algorithm will 063 * be to (hopefully) be able to learn long and complex concepts 064 * more efficiently. 065 * A horizontal expansion factor has its benefits, but limits 066 * the length of concepts learnable in reasonable time to 067 * about 15 with its default value of 0.6 and a small sized 068 * background knowledge base. We hope to get more fine-grained 069 * control of whether it makes sense to extend a node with 070 * more sophisticated heuristics. 071 * Dropping the horizontal expansion factor means that the 072 * completeness of the algorithm depends on the heuristic. 073 * 074 * @author Jens Lehmann 075 * 076 */ 077@ComponentAnn(name = "OWL Class Expression Learner", shortName = "ocel", version = 1.2) 078public class OCEL extends AbstractCELA { 079 080 // actual algorithm 081 private static Logger logger = LoggerFactory.getLogger(OCEL.class); 082 private String logLevel = CommonConfigOptions.logLevelDefault; 083 084 // often the learning problems needn't be accessed directly; instead 085 // use the example sets below and the posonly variable 086 private OWLClassExpression startDescription; 087 private int nrOfExamples; 088 private int nrOfPositiveExamples; 089 private Set<OWLIndividual> positiveExamples; 090 private int nrOfNegativeExamples; 091 private Set<OWLIndividual> negativeExamples; 092 093 private int allowedMisclassifications = 0; 094 095 // search tree options 096 @ConfigOption(defaultValue = "false", description = "specifies whether to write a search tree") 097 private boolean writeSearchTree; 098 @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree") 099 private File searchTreeFile; 100 @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree") 101 private boolean replaceSearchTree = false; 102 103 // constructs to improve performance 104 @ConfigOption(defaultValue = "true", description = "exclude too weak concepts when they occur as sub concept") 105 private boolean useTooWeakList = true; 106 @ConfigOption(defaultValue = "true") 107 private boolean useOverlyGeneralList = true; 108 @ConfigOption(defaultValue = "true", description = "whether to shorten concepts to ignore identical refinement. " + 109 "e.g. male AND male is shortened to male. ") 110 private boolean useShortConceptConstruction = true; 111 112 // extended Options 113 private boolean maxExecutionTimeAlreadyReached = false; 114 @ConfigOption(defaultValue = "0", description = "Minimum time the algorithm has to run before termination (even if solution " + 115 "already found") 116 private int minExecutionTimeInSeconds = CommonConfigOptions.minExecutionTimeInSecondsDefault; 117 private boolean minExecutionTimeAlreadyReached = false; 118 @ConfigOption(defaultValue = "1", description = "how many sufficient solutions must be found before termination, if " + 119 "terminateOnNoiseReached is enabled") 120 private int guaranteeXgoodDescriptions = CommonConfigOptions.guaranteeXgoodDescriptionsDefault; 121 private boolean guaranteeXgoodAlreadyReached = false; 122 @ConfigOption(defaultValue = "0", description = "The maximum number of candidate hypothesis the algorithm is " + 123 "allowed to test (0 = no limit). The algorithm will stop afterwards") 124 private int maxClassDescriptionTests = CommonConfigOptions.maxClassDescriptionTestsDefault; 125 126 @ConfigOption(defaultValue = "false", description = "if set to false we do not test properness; this may seem wrong " + 127 "but the disadvantage of properness testing are additional reasoner queries and a search bias towards " + 128 "ALL r.something because ALL r.TOP is improper and automatically expanded further") 129 private boolean usePropernessChecks = false; 130 131 @ConfigOption(defaultValue = "false", description = "tree traversal means to run through the most promising " + 132 "concepts and connect them in an intersection to find a solution (this is called irregularly e.g. " + 133 "every 100 seconds)") 134 private boolean useTreeTraversal = false; 135 136 @ConfigOption(defaultValue = "true", 137 description = "if this variable is set to true, then the refinement operator is applied until all " + 138 "concept of equal length have been found e.g. TOP -> A1 -> A2 -> A3 is found in one loop; " + 139 "the disadvantage are potentially more method calls, but the advantage is that the algorithm " + 140 "is better in locating relevant concept in the subsumption hierarchy (otherwise, if the most " + 141 "general concept is not promising, it may never get expanded)") 142 private boolean forceRefinementLengthIncrease = true; 143 144 @ConfigOption(defaultValue = "true", 145 description = "candidate reduction: using this mechanism we can simulate the divide&conquer approach " + 146 "in many ILP programs using a clause by clause search; after a period of time the candidate " + 147 "set is reduced to focus CPU time on the most promising concepts") 148 private boolean useCandidateReduction = true; 149 @ConfigOption(defaultValue = "30", description = "maximum number of candidates to retain") 150 private int candidatePostReductionSize = 30; 151 152 // solution protocol 153 private List<ExampleBasedNode> solutions = new LinkedList<>(); 154 155 @ConfigOption(defaultValue = "false", description = "specifies whether to compute and log benchmark information") 156 private boolean computeBenchmarkInformation = false; 157 158 // comparator used to maintain a stable ordering of nodes, i.e. 159 // an ordering which does not change during the run of the algorithm 160 private NodeComparatorStable nodeComparatorStable = new NodeComparatorStable(); 161 // node from which algorithm has started 162 private SearchTreeNonWeak<ExampleBasedNode> searchTreeStable; 163 private SearchTreeNonWeakPartialSet<ExampleBasedNode> searchTree; 164 165 // evaluated descriptions 166 167 // comparator for evaluated descriptions 168 private EvaluatedDescriptionPosNegComparator edComparator = new EvaluatedDescriptionPosNegComparator(); 169 170 // utility variables 171 private DecimalFormat df = new DecimalFormat(); 172 173 // all concepts which have been evaluated as being proper refinements 174 private SortedSet<OWLClassExpression> properRefinements = new TreeSet<>(); 175 176 // blacklists 177 private SortedSet<OWLClassExpression> tooWeakList = new TreeSet<>(); 178 private SortedSet<OWLClassExpression> overlyGeneralList = new TreeSet<>(); 179 180 // set of expanded nodes (TODO: better explanation) 181 TreeSet<ExampleBasedNode> expandedNodes = new TreeSet<>(nodeComparatorStable); 182 183 // statistic variables 184 private int maxRecDepth = 0; 185 private int maxNrOfRefinements = 0; 186 private int maxNrOfChildren = 0; 187 private int redundantConcepts = 0; 188 private int propernessTestsReasoner = 0; 189 private int propernessTestsAvoidedByShortConceptConstruction = 0; 190 private int propernessTestsAvoidedByTooWeakList = 0; 191 private int conceptTestsTooWeakList = 0; 192 private int conceptTestsOverlyGeneralList = 0; 193 private int conceptTestsReasoner = 0; 194 195 // time variables 196 private long runtime; 197 private long algorithmStartTime; 198 private long propernessCalcTimeNs = 0; 199 private long propernessCalcReasoningTimeNs = 0; 200 private long childConceptsDeletionTimeNs = 0; 201 private long refinementCalcTimeNs = 0; 202 private long redundancyCheckTimeNs = 0; 203 private long evaluateSetCreationTimeNs = 0; 204 private long improperConceptsRemovalTimeNs = 0; 205 206 @ConfigOption(defaultValue = "true", description = "specifies whether to terminate when noise criterion is met") 207 private boolean terminateOnNoiseReached = true; 208 209 @ConfigOption(defaultValue = "1.0", description = "(for the ExampleBasedNode.) weighting factor on the number of " + 210 "true negatives (true positives are weigthed with 1)") 211 private double negativeWeight = 1.0; 212 213 @ConfigOption(defaultValue = "0.1", description = "(for the ExampleBasedNode.) the score value for the start node") 214 private double startNodeBonus = 0.1; 215 216 @ConfigOption(description = "For the MultiHeuristic: how much accuracy gain is worth an increase of horizontal " + 217 "expansion by one (typical value: 0.01)", 218 defaultValue = "0.02") 219 private double expansionPenaltyFactor = 0.02; 220 221 @ConfigOption(defaultValue = "0", description = "(for the ExampleBasedNode.) penalty value to deduce for using a " + 222 "negated class expression (complementOf)") 223 private int negationPenalty = 0; 224 225 @ConfigOption(description = "adjust the weights of class expression length in refinement", 226 defaultValue = "OCEL default metric") 227 private OWLClassExpressionLengthMetric lengthMetric; 228 229 // dependencies 230 @ConfigOption(defaultValue = "RhoDRDown", description = "the refinement operator instance to use") 231 private LengthLimitedRefinementOperator operator; 232 @ConfigOption(description = "the heuristic to guide the search", defaultValue = "MultiHeuristic") 233 private ExampleBasedHeuristic heuristic; 234 235 // configuration options 236 private static String defaultSearchTreeFile = "log/searchTree.txt"; 237 238 @ConfigOption(defaultValue = "true", description = "if enabled, modifies the subsumption hierarchy such that for " + 239 "each class, there is only a single path to reach it via upward and downward refinement respectively.") 240 private boolean improveSubsumptionHierarchy = true; 241 242 private static double noisePercentageDefault = 0.0; 243 @ConfigOption(defaultValue = "0.0", description = "noise regulates how many positives can be misclassified and when " + 244 "the algorithm terminates") 245 private double noisePercentage = noisePercentageDefault; 246 @ConfigOption( 247 defaultValue = "owl:Thing", 248 description = "You can specify a start class for the algorithm", 249 exampleValue = "ex:Male or http://example.org/ontology/Female") 250 private OWLClassExpression startClass = null; 251 252 // Variablen zur Einstellung der Protokollierung 253 @ConfigOption(defaultValue = "false", description = "show additional timing info for benchmark purposes") 254 boolean showBenchmarkInformation = false; 255 256 public OCEL() { 257 } 258 259 // soll später einen Operator und eine Heuristik entgegennehmen 260 public OCEL(PosNegLP learningProblem, AbstractReasonerComponent reasoningService) { 261 super(learningProblem, reasoningService); 262 } 263 264 public OCEL(PosOnlyLP learningProblem, AbstractReasonerComponent reasoningService) { 265 super(learningProblem, reasoningService); 266 } 267 268 /* (non-Javadoc) 269 * @see org.dllearner.core.Component#init() 270 */ 271 @Override 272 public void init() throws ComponentInitException { 273 // exit with a ComponentInitException if the reasoner is unsupported for this learning algorithm 274 if (getReasoner().getReasonerType() == ReasonerType.DIG) { 275 throw new ComponentInitException("DIG does not support the inferences needed in the selected learning algorithm component: " + AnnComponentManager.getName(this)); 276 } 277 278 // set log level if the option has been set 279 if (!logLevel.equals(CommonConfigOptions.logLevelDefault)) 280 org.apache.log4j.Logger.getLogger(OCEL.class).setLevel(Level.toLevel(logLevel, Level.toLevel(CommonConfigOptions.logLevelDefault))); 281 282 if (searchTreeFile == null) 283 searchTreeFile = new File(defaultSearchTreeFile); 284 285 if (writeSearchTree) 286 Files.clearFile(searchTreeFile); 287 288 // adjust heuristic 289 290 if (heuristic == null) { 291 if (getLearningProblem() instanceof PosOnlyLP) { 292 throw new RuntimeException("does not work with positive examples only yet"); 293 } else { 294 heuristic = new MultiHeuristic(((PosNegLP) getLearningProblem()).getPositiveExamples().size(), ((PosNegLP) getLearningProblem()).getNegativeExamples().size(), negativeWeight, startNodeBonus, expansionPenaltyFactor, negationPenalty); 295 } 296 } else { 297 // we need to set some variables to make the heuristic work 298 if (heuristic instanceof MultiHeuristic) { 299 MultiHeuristic mh = ((MultiHeuristic) heuristic); 300 if (mh.getNrOfNegativeExamples() == 0) { 301 mh.setNrOfNegativeExamples(((PosNegLP) getLearningProblem()).getNegativeExamples().size()); 302 } 303 int nrPosEx = ((PosNegLP) getLearningProblem()).getPositiveExamples().size(); 304 int nrNegEx = ((PosNegLP) getLearningProblem()).getNegativeExamples().size(); 305 if (mh.getNrOfExamples() == 0) { 306 mh.setNrOfExamples(nrPosEx + nrNegEx); 307 } 308 if (mh.getNrOfNegativeExamples() == 0) { 309 mh.setNrOfNegativeExamples(nrNegEx); 310 } 311 } else if (heuristic instanceof FlexibleHeuristic) { 312 FlexibleHeuristic h2 = (FlexibleHeuristic) heuristic; 313 if (h2.getNrOfNegativeExamples() == 0) { 314 h2.setNrOfNegativeExamples(((PosNegLP) getLearningProblem()).getNegativeExamples().size()); 315 } 316 } 317 318 } 319 320 // compute used concepts/roles from allowed/ignored 321 // concepts/roles 322 323 // prepare subsumption and role hierarchies, because they are needed 324 // during the run of the algorithm; 325 // in contrast to before, the learning algorithms have to maintain their 326 // own view on the class hierarchy 327 ClassHierarchy classHierarchy = initClassHierarchy(); 328 ObjectPropertyHierarchy objectPropertyHierarchy = initObjectPropertyHierarchy(); 329 DatatypePropertyHierarchy datatypePropertyHierarchy = initDataPropertyHierarchy(); 330 if (improveSubsumptionHierarchy) { 331 classHierarchy.thinOutSubsumptionHierarchy(); 332 objectPropertyHierarchy.thinOutSubsumptionHierarchy(); 333 datatypePropertyHierarchy.thinOutSubsumptionHierarchy(); 334 } 335 336 // create a refinement operator and pass all configuration 337 // variables to it 338 if (operator == null) { 339 // we use a default operator and inject the class hierarchy for now 340 operator = new RhoDRDown(); 341 if (operator instanceof CustomStartRefinementOperator) { 342 ((CustomStartRefinementOperator) operator).setStartClass(startClass); 343 } 344 if (operator instanceof ReasoningBasedRefinementOperator) { 345 ((ReasoningBasedRefinementOperator) operator).setReasoner(reasoner); 346 } 347 operator.init(); 348 } 349 // TODO: find a better solution as this is quite difficult to debug 350 if (operator instanceof CustomHierarchyRefinementOperator) { 351 ((CustomHierarchyRefinementOperator) operator).setClassHierarchy(classHierarchy); 352 ((CustomHierarchyRefinementOperator) operator).setObjectPropertyHierarchy(objectPropertyHierarchy); 353 ((CustomHierarchyRefinementOperator) operator).setDataPropertyHierarchy(datatypePropertyHierarchy); 354 } 355 356 if (lengthMetric == null) { 357 lengthMetric = OWLClassExpressionLengthMetric.getOCELMetric(); 358 } 359 operator.setLengthMetric(lengthMetric); 360 361 // create an algorithm object and pass all configuration 362 // options to it 363 364 positiveExamples = ((PosNegLP) learningProblem).getPositiveExamples(); 365 negativeExamples = ((PosNegLP) learningProblem).getNegativeExamples(); 366 nrOfPositiveExamples = positiveExamples.size(); 367 nrOfNegativeExamples = negativeExamples.size(); 368 369 nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples; 370 baseURI = reasoner.getBaseURI(); 371 prefixes = reasoner.getPrefixes(); 372 // note: used concepts and roles do not need to be passed 373 // as argument, because it is sufficient to prepare the 374 // concept and role hierarchy accordingly 375 376 initialized = true; 377 } 378 379 @Override 380 public void start() { 381 stop = false; 382 isRunning = true; 383 runtime = System.currentTimeMillis(); 384 385 // reset values (algorithms may be started several times) 386 searchTree = new SearchTreeNonWeakPartialSet<>(heuristic); 387 searchTreeStable = new SearchTreeNonWeak<>(nodeComparatorStable); 388 solutions.clear(); 389 maxExecutionTimeAlreadyReached = false; 390 minExecutionTimeAlreadyReached = false; 391 guaranteeXgoodAlreadyReached = false; 392 propernessTestsReasoner = 0; 393 propernessTestsAvoidedByShortConceptConstruction = 0; 394 propernessTestsAvoidedByTooWeakList = 0; 395 conceptTestsTooWeakList = 0; 396 conceptTestsOverlyGeneralList = 0; 397 propernessCalcTimeNs = 0; 398 propernessCalcReasoningTimeNs = 0; 399 childConceptsDeletionTimeNs = 0; 400 refinementCalcTimeNs = 0; 401 redundancyCheckTimeNs = 0; 402 evaluateSetCreationTimeNs = 0; 403 improperConceptsRemovalTimeNs = 0; 404 405 Monitor totalLearningTime = JamonMonitorLogger.getTimeMonitor(OCEL.class, "totalLearningTime").start(); 406 // TODO: write a JUnit test for this problem (long-lasting or infinite loops because 407 // redundant children of a node are called recursively when a node is extended twice) 408 /* 409 */ 410 411 // calculate quality threshold required for a solution 412 allowedMisclassifications = (int) Math.round(noisePercentage * nrOfExamples / 100); 413 414 // start search with start class 415 ExampleBasedNode startNode; 416 if (startDescription == null) { 417 startNode = new ExampleBasedNode(dataFactory.getOWLThing(), this); 418 startNode.setCoveredExamples(positiveExamples, negativeExamples); 419 } else { 420 startNode = new ExampleBasedNode(startDescription, this); 421 Set<OWLIndividual> coveredNegatives = reasoner.hasType(startDescription, negativeExamples); 422 Set<OWLIndividual> coveredPositives = reasoner.hasType(startDescription, positiveExamples); 423 startNode.setCoveredExamples(coveredPositives, coveredNegatives); 424 } 425 426 searchTree.addNode(null, startNode); 427 searchTreeStable.addNode(null, startNode); 428 429 ExampleBasedNode previousBestNode = startNode; 430 ExampleBasedNode bestNode = startNode; 431 ExampleBasedNode bestNodeStable = startNode; 432 433 logger.info("starting top down refinement with: " + renderer.render(startNode.getConcept()) + " (" + df.format(100 * startNode.getAccuracy()) + "% accuracy)"); 434 435 int loop = 0; 436 437 algorithmStartTime = System.nanoTime(); 438 long lastPrintTime = 0; 439 long lastTreeTraversalTime = System.nanoTime(); 440 long lastReductionTime = System.nanoTime(); 441 // try a traversal after x seconds 442 long traversalInterval = 300L * 1000000000L; 443 long reductionInterval = 300L * 1000000000L; 444 long currentTime; 445 446 while (!isTerminationCriteriaReached()) { 447 // print statistics at most once a second 448 currentTime = System.nanoTime(); 449 if (currentTime - lastPrintTime > 3000000000L) { 450 printStatistics(false); 451 lastPrintTime = currentTime; 452 logger.debug("--- loop " + loop + " started ---"); 453 } 454 455 // traverse the current search tree to find a solution 456 if (useTreeTraversal && (currentTime - lastTreeTraversalTime > traversalInterval)) { 457 traverseTree(); 458 lastTreeTraversalTime = System.nanoTime(); 459 } 460 461 // reduce candidates to focus on promising concepts 462 if (useCandidateReduction && (currentTime - lastReductionTime > reductionInterval)) { 463 reduceCandidates(); 464 lastReductionTime = System.nanoTime(); 465 // Logger.getRootLogger().setLevel(Level.TRACE); 466 } 467 468 // we record when a more accurate node is found and log it 469 if (bestNodeStable.getCovPosMinusCovNeg() < searchTreeStable.best() 470 .getCovPosMinusCovNeg()) { 471 String acc = new DecimalFormat(".00%").format((searchTreeStable.best().getAccuracy())); 472 // no handling needed, it will just look ugly in the output 473 logger.info("more accurate (" + acc + ") class expression found: " + renderer.render(searchTreeStable.best().getConcept())); 474 if (logger.isTraceEnabled()) { 475 logger.trace(Sets.difference(positiveExamples, bestNodeStable.getCoveredNegatives()).toString()); 476 logger.trace(Sets.difference(negativeExamples, bestNodeStable.getCoveredNegatives()).toString()); 477 } 478 printBestSolutions(5); 479 printStatistics(false); 480 bestNodeStable = searchTreeStable.best(); 481 } 482 483 // chose best node according to heuristics 484 bestNode = searchTree.best(); 485 // best node is removed temporarily, because extending it can 486 // change its evaluation 487 searchTree.updatePrepare(bestNode); 488 extendNodeProper(bestNode, bestNode.getHorizontalExpansion() + 1); 489 searchTree.updateDone(bestNode); 490 previousBestNode = bestNode; 491 492 if (writeSearchTree) { 493 // String treeString = ""; 494 StringBuilder treeString = new StringBuilder("best node: " + bestNode + "\n"); 495 if (expandedNodes.size() > 1) { 496 treeString.append("all expanded nodes:\n"); 497 for (ExampleBasedNode n : expandedNodes) { 498 treeString.append(" ").append(n).append("\n"); 499 } 500 } 501 expandedNodes.clear(); 502 treeString.append(TreeUtils.toTreeString(startNode, heuristic)); 503 treeString.append("\n"); 504 505 if (replaceSearchTree) 506 Files.createFile(searchTreeFile, treeString.toString()); 507 else 508 Files.appendToFile(searchTreeFile, treeString.toString()); 509 } 510 511 // Anzahl Schleifendurchläufe 512 loop++; 513 }// end while 514 515 if (solutions.size() > 0) { 516 int solutionLimit = 20; 517 // we do not need to print the best node if we display the top 20 solutions below anyway 518 logger.info("solutions (at most " + solutionLimit + " are shown):"); 519 int show = 1; 520 for (ExampleBasedNode c : solutions) { 521 logger.info(show + ": " + renderer.render(c.getConcept()) 522 + " (accuracy " + df.format(100 * c.getAccuracy()) + "%, length " 523 + OWLClassExpressionUtils.getLength(c.getConcept()) 524 + ", depth " + OWLClassExpressionUtils.getDepth(c.getConcept()) + ")"); 525 if (show >= solutionLimit) { 526 break; 527 } 528 show++; 529 } 530 } else { 531 logger.info("no appropriate solutions found (try increasing the noisePercentage parameter to what was reported as most accurate expression found above)"); 532 } 533 534 logger.debug("size of candidate set: " + searchTree.size()); 535 printBestSolutions(20); 536 537 printStatistics(true); 538 539 int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList; 540 if (stop) { 541 logger.info("Algorithm stopped (" + conceptTests + " descriptions tested).\n"); 542 } else { 543 logger.info("Algorithm terminated successfully (" + conceptTests + " descriptions tested).\n"); 544 logger.info(reasoner.toString()); 545 } 546 547 totalLearningTime.stop(); 548 isRunning = false; 549 } 550 551 // we apply the operator recursively until all proper refinements up 552 // to the maxmimum length are reached 553 private void extendNodeProper(ExampleBasedNode node, int maxLength) { 554 long propCalcNsStart = System.nanoTime(); 555 556 if (writeSearchTree) 557 expandedNodes.add(node); 558 559 if (node.getChildren().size() > maxNrOfChildren) 560 maxNrOfChildren = node.getChildren().size(); 561 562 extendNodeProper(node, node.getConcept(), maxLength, 0); 563 node.setHorizontalExpansion(maxLength); 564 565 propernessCalcTimeNs += (System.nanoTime() - propCalcNsStart); 566 } 567 568 // for all refinements of concept up to max length, we check whether they are proper 569 // and call the method recursively if not 570 // recDepth is used only for statistics 571 private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept, int maxLength, 572 int recDepth) { 573 574 // do not execute methods if algorithm has been stopped (this means that the algorithm 575 // will terminate without further reasoning queries) 576 if (stop) 577 return; 578 579 if (recDepth > maxRecDepth) 580 maxRecDepth = recDepth; 581 582 // compute refinements => we must not delete refinements with low horizontal expansion, 583 // because they are used in recursive calls of this method later on 584 long refinementCalcTimeNsStart = System.nanoTime(); 585 Set<OWLClassExpression> refinements = operator.refine(concept, maxLength, null); 586 refinementCalcTimeNs += System.nanoTime() - refinementCalcTimeNsStart; 587 588 if (refinements.size() > maxNrOfRefinements) 589 maxNrOfRefinements = refinements.size(); 590 591 // remove all refinements that are already children of the node 592 long childConceptsDeletionTimeNsStart = System.nanoTime(); 593 refinements.removeAll(node.getChildConcepts()); 594 childConceptsDeletionTimeNs += System.nanoTime() - childConceptsDeletionTimeNsStart; 595 596 // evaluate all concepts whose length is bigger than the horizontal expansion of the node 597 long evaluateSetCreationTimeNsStart = System.nanoTime(); 598 Set<OWLClassExpression> toEvaluateConcepts = new TreeSet<>(); 599 Iterator<OWLClassExpression> it = refinements.iterator(); 600 601 while (it.hasNext()) { 602 603 OWLClassExpression refinement = it.next(); 604 if (OWLClassExpressionUtils.getLength(refinement, lengthMetric) > node.getHorizontalExpansion()) { 605 // TRUE means that improperness was detected, but FALSE does not mean that the refinement is proper 606 boolean impropernessDetected = false; 607 608 // 1. short concept construction 609 if (useShortConceptConstruction) { 610 OWLClassExpression shortConcept = ConceptTransformation.getShortConcept(refinement); 611 // compare with original concept 612 int n = shortConcept.compareTo(concept); 613 614 // concepts are equal, i.e. refinement is improper 615 if (n == 0) { 616 propernessTestsAvoidedByShortConceptConstruction++; 617 impropernessDetected = true; 618 } 619 } 620 621 // 2. too weak test 622 if (!impropernessDetected && useTooWeakList) { 623 if (refinement instanceof OWLObjectIntersectionOf) { 624 boolean tooWeakElement = containsTooWeakElement((OWLObjectIntersectionOf) refinement); 625 if (tooWeakElement) { 626 propernessTestsAvoidedByTooWeakList++; 627 conceptTestsTooWeakList++; 628 impropernessDetected = true; 629 // tooWeakList.add(refinement); 630 631 // Knoten wird direkt erzeugt (es ist buganfällig zwei Plätze 632 // zu haben, an denen Knoten erzeugt werden, aber es erscheint 633 // hier am sinnvollsten) 634 properRefinements.add(refinement); 635 tooWeakList.add(refinement); 636 637 ExampleBasedNode newNode = new ExampleBasedNode(refinement, this); 638 newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1); 639 newNode.setTooWeak(true); 640 newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.TOO_WEAK_LIST); 641 node.addChild(newNode); 642 643 // Refinement muss gelöscht werden, da es proper ist 644 it.remove(); 645 } 646 } 647 } 648 649 // properness konnte nicht vorher ermittelt werden 650 if (!impropernessDetected) { 651 toEvaluateConcepts.add(refinement); 652 } 653 654 } 655 656 } 657 evaluateSetCreationTimeNs += System.nanoTime() - evaluateSetCreationTimeNsStart; 658 659 Set<OWLClassExpression> improperConcepts = null; 660 if (toEvaluateConcepts.size() > 0) { 661 // Test aller Konzepte auf properness (mit DIG in nur einer Anfrage) 662 if (usePropernessChecks) { 663 long propCalcReasoningStart = System.nanoTime(); 664 improperConcepts = reasoner.isSuperClassOf(toEvaluateConcepts, concept); 665 666 propernessTestsReasoner += toEvaluateConcepts.size(); 667 propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart; 668 } 669 } 670 671 long improperConceptsRemovalTimeNsStart = System.nanoTime(); 672 // die improper Konzepte werden von den auszuwertenden gelöscht, d.h. 673 // alle proper concepts bleiben übrig (einfache Umbenennung) 674 if (improperConcepts != null) 675 toEvaluateConcepts.removeAll(improperConcepts); 676 Set<OWLClassExpression> properConcepts = toEvaluateConcepts; 677 // alle proper concepts von refinements löschen 678 refinements.removeAll(properConcepts); 679 improperConceptsRemovalTimeNs += System.nanoTime() - improperConceptsRemovalTimeNsStart; 680 681 for (OWLClassExpression refinement : properConcepts) { 682 long redundancyCheckTimeNsStart = System.nanoTime(); 683 boolean nonRedundant = properRefinements.add(refinement); 684 redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart; 685 686 if (!nonRedundant) 687 redundantConcepts++; 688 689 // es wird nur ein neuer Knoten erzeugt, falls das Konzept nicht 690 // schon existiert 691 if (nonRedundant) { 692 693 // newly created node 694 ExampleBasedNode newNode = new ExampleBasedNode(refinement, this); 695 // die -1 ist wichtig, da sonst keine gleich langen Refinements 696 // für den neuen Knoten erlaubt wären z.B. person => male 697 newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1); 698 699 boolean qualityKnown = false; 700 int quality = -2; 701 702 // overly general list verwenden 703 if (useOverlyGeneralList && refinement instanceof OWLObjectUnionOf) { 704 if (containsOverlyGeneralElement((OWLObjectUnionOf) refinement)) { 705 conceptTestsOverlyGeneralList++; 706 quality = nrOfNegativeExamples; 707 qualityKnown = true; 708 newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.OVERLY_GENERAL_LIST); 709 newNode.setCoveredExamples(positiveExamples, negativeExamples); 710 } 711 712 } 713 714 // Qualität des Knotens auswerten 715 if (!qualityKnown) { // -> quality == -2 716 long propCalcReasoningStart2 = System.nanoTime(); 717 conceptTestsReasoner++; 718 719 // determine individuals which have not been covered yet 720 // (more efficient than full retrieval) 721 Set<OWLIndividual> coveredPositives = node.getCoveredPositives(); 722 Set<OWLIndividual> newlyCoveredPositives = new HashSet<>(); 723 724 // calculate how many pos. examples are not covered by the 725 // parent node of the refinement 726 int misclassifiedPositives = nrOfPositiveExamples - coveredPositives.size(); 727 728 // iterate through all covered examples (examples which are not 729 // covered do not need to be tested, because they remain uncovered); 730 // DIG will be slow if we send each reasoner request individually 731 // (however if we send everything in one request, too many instance checks 732 // are performed => rely on fast instance checker) 733 for (OWLIndividual i : coveredPositives) { 734 // TODO: move code to a separate function 735 if (quality != -1) { 736 boolean covered = reasoner.hasType(refinement, i); 737 if (!covered) 738 misclassifiedPositives++; 739 else 740 newlyCoveredPositives.add(i); 741 742 if (misclassifiedPositives > allowedMisclassifications) 743 quality = -1; 744 745 } 746 } 747 748 Set<OWLIndividual> newlyCoveredNegatives = null; 749 if (quality != -1) { 750 Set<OWLIndividual> coveredNegatives = node.getCoveredNegatives(); 751 newlyCoveredNegatives = new HashSet<>(); 752 753 for (OWLIndividual i : coveredNegatives) { 754 boolean covered = reasoner.hasType(refinement, i); 755 if (covered) 756 newlyCoveredNegatives.add(i); 757 } 758 } 759 760 propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2; 761 newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.REASONER); 762 if (quality != -1 && !(((PosNegLP) learningProblem).getAccuracyMethod() instanceof AccMethodNoWeakness) && 763 ((PosNegLP) learningProblem).getAccuracyMethod().getAccOrTooWeak2( 764 newlyCoveredPositives.size(), nrOfPositiveExamples - newlyCoveredPositives.size(), 765 newlyCoveredNegatives.size(), nrOfNegativeExamples - newlyCoveredNegatives.size(), 766 1) == -1) 767 quality = -1; 768 769 if (quality != -1) { 770 // quality is the number of misclassifications (if it is 771 // not too weak) 772 quality = (nrOfPositiveExamples - newlyCoveredPositives.size()) 773 + newlyCoveredNegatives.size(); 774 newNode.setCoveredExamples(newlyCoveredPositives, newlyCoveredNegatives); 775 } 776 777 } 778 779 if (quality == -1) { 780 newNode.setTooWeak(true); 781 // Blacklist für too weak concepts 782 tooWeakList.add(refinement); 783 } else { 784 // Lösung gefunden 785 if (quality >= 0 && quality <= allowedMisclassifications) { 786 solutions.add(newNode); 787 } 788 789 // we need to make sure that all positives are covered 790 // before adding something to the overly general list 791 if ((newNode.getCoveredPositives().size() == nrOfPositiveExamples) 792 && quality == nrOfNegativeExamples) 793 overlyGeneralList.add(refinement); 794 795 } 796 797 node.addChild(newNode); 798 799 // it is often useful to continue expanding until a longer node is 800 // reached (to replace atomic concepts with more specific ones) 801 if (forceRefinementLengthIncrease && !newNode.isTooWeak()) { 802 // extend node again if its concept has the same length 803 if (OWLClassExpressionUtils.getLength(node.getConcept(), lengthMetric) == OWLClassExpressionUtils.getLength(newNode.getConcept(), lengthMetric)) { 804 extendNodeProper(newNode, refinement, maxLength, recDepth + 1); 805 } 806 } 807 808 } 809 } 810 811 // es sind jetzt noch alle Konzepte übrig, die improper refinements sind 812 // auf jedem dieser Konzepte wird die Funktion erneut aufgerufen, da 813 // sich proper refinements ergeben könnten 814 for (OWLClassExpression refinement : refinements) { 815 // check for redundancy (otherwise we may run into very 816 // time-intensive loops, see planned JUnit test case $x) 817 818 long redundancyCheckTimeNsStart = System.nanoTime(); 819 boolean redundant = properRefinements.contains(refinement); 820 redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart; 821 822 if (!redundant) { 823 extendNodeProper(node, refinement, maxLength, recDepth + 1); 824 } 825 } 826 } 827 828 private void printStatistics(boolean finalStats) { 829 // TODO: viele Tests haben ergeben, dass man nie 100% mit der Zeitmessung abdecken 830 // kann (zum einen weil Stringausgabe verzögert erfolgt und zum anderen weil 831 // Funktionsaufrufe, garbage collection, Zeitmessung selbst auch Zeit benötigt); 832 // es empfiehlt sich folgendes Vorgehen: 833 // - Messung der Zeit eines Loops im Algorithmus 834 // - Messung der Zeit für alle node extensions innerhalb eines Loops 835 // => als Normalisierungsbasis empfehlen sich dann die Loopzeit statt 836 // Algorithmuslaufzeit 837 // ... momentan kann es aber auch erstmal so lassen 838 839 long algorithmRuntime = System.nanoTime() - algorithmStartTime; 840 841 if (!finalStats) { 842 843 ExampleBasedNode bestNode = searchTreeStable.best(); 844 logger.debug("start node: " 845 + searchTreeStable.getRoot().getShortDescription()); 846 String bestNodeString = "currently best node: " 847 + bestNode.getShortDescription(); 848 849 logger.debug(bestNodeString); 850 logger.trace(bestNode.getStats()); 851 if (bestNode.getCoveredNegatives().size() <= 5) 852 logger.trace("covered negs: " + bestNode.getCoveredNegatives()); 853 String expandedNodeString = "next expanded node: " 854 + searchTree.best().getShortDescription(); 855 logger.debug(expandedNodeString); 856 logger.debug("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime)); 857 logger.debug("size of candidate set: " + searchTree.size()); 858 logger.debug("subsumption time: " + Helper.prettyPrintNanoSeconds(reasoner.getSubsumptionReasoningTimeNs())); 859 logger.debug("instance check time: " + Helper.prettyPrintNanoSeconds(reasoner.getInstanceCheckReasoningTimeNs())); 860 logger.debug("retrieval time: " + Helper.prettyPrintNanoSeconds(reasoner.getRetrievalReasoningTimeNs())); 861 } 862 863 if (computeBenchmarkInformation) { 864 865 long reasoningTime = reasoner.getOverallReasoningTimeNs(); 866 double reasoningPercentage = 100 * reasoningTime / (double) algorithmRuntime; 867 long propWithoutReasoning = propernessCalcTimeNs - propernessCalcReasoningTimeNs; 868 double propPercentage = 100 * propWithoutReasoning / (double) algorithmRuntime; 869 double deletionPercentage = 100 * childConceptsDeletionTimeNs / (double) algorithmRuntime; 870 long subTime = reasoner.getSubsumptionReasoningTimeNs(); 871 double subPercentage = 100 * subTime / (double) algorithmRuntime; 872 double refinementPercentage = 100 * refinementCalcTimeNs / (double) algorithmRuntime; 873 double redundancyCheckPercentage = 100 * redundancyCheckTimeNs / (double) algorithmRuntime; 874 double evaluateSetCreationTimePercentage = 100 * evaluateSetCreationTimeNs / (double) algorithmRuntime; 875 double improperConceptsRemovalTimePercentage = 100 * improperConceptsRemovalTimeNs / (double) algorithmRuntime; 876 877 logger.debug("reasoning percentage: " + df.format(reasoningPercentage) + "%"); 878 logger.debug(" subsumption check time: " + df.format(subPercentage) + "%"); 879 logger.debug("proper calculation percentage (wo. reasoning): " + df.format(propPercentage) + "%"); 880 logger.debug(" deletion time percentage: " + df.format(deletionPercentage) + "%"); 881 logger.debug(" refinement calculation percentage: " + df.format(refinementPercentage) + "%"); 882 883 if (operator instanceof RhoDRDown) { 884 double mComputationTimePercentage = 100 * ((RhoDRDown) operator).mComputationTimeNs / (double) algorithmRuntime; 885 double topComputationTimePercentage = 100 * ((RhoDRDown) operator).topComputationTimeNs / (double) algorithmRuntime; 886 logger.debug(" m calculation percentage: " + df.format(mComputationTimePercentage) + "%"); 887 logger.debug(" top calculation percentage: " + df.format(topComputationTimePercentage) + "%"); 888 } 889 890 double cleanTimePercentage = 100 * ConceptTransformation.cleaningTimeNs / (double) algorithmRuntime; 891 double onnfTimePercentage = 100 * ConceptTransformation.onnfTimeNs / (double) algorithmRuntime; 892 double shorteningTimePercentage = 100 * ConceptTransformation.shorteningTimeNs / (double) algorithmRuntime; 893 894 logger.debug(" redundancy check percentage: " + df.format(redundancyCheckPercentage) + "%"); 895 logger.debug(" evaluate set creation time percentage: " + df.format(evaluateSetCreationTimePercentage) + "%"); 896 logger.debug(" improper concepts removal time percentage: " + df.format(improperConceptsRemovalTimePercentage) + "%"); 897 logger.debug("clean time percentage: " + df.format(cleanTimePercentage) + "%"); 898 logger.debug("onnf time percentage: " + df.format(onnfTimePercentage) + "%"); 899 logger.debug("shortening time percentage: " + df.format(shorteningTimePercentage) + "%"); 900 } 901 902 logger.debug("properness tests (reasoner/short concept/too weak list): " 903 + propernessTestsReasoner + "/" + propernessTestsAvoidedByShortConceptConstruction 904 + "/" + propernessTestsAvoidedByTooWeakList); 905 logger.debug("concept tests (reasoner/too weak list/overly general list/redundant concepts): " 906 + conceptTestsReasoner + "/" + conceptTestsTooWeakList + "/" + conceptTestsOverlyGeneralList + "/" + redundantConcepts); 907 } 908 909 private boolean containsTooWeakElement(OWLObjectIntersectionOf mc) { 910 for (OWLClassExpression child : mc.getOperands()) { 911 if (tooWeakList.contains(child)) 912 return true; 913 } 914 return false; 915 } 916 917 private boolean containsOverlyGeneralElement(OWLObjectUnionOf md) { 918 for (OWLClassExpression child : md.getOperands()) { 919 if (overlyGeneralList.contains(child)) 920 return true; 921 } 922 return false; 923 } 924 925 // TODO: investigate whether it makes sense not to store all individuals 926 // in the nodes, but instead perform instance checks in tree traversal 927 // (it is only run in large intervals so it shouldn't be too expensive) 928 private void traverseTree() { 929 ExampleBasedNode startNode = findBestTraversalStartNode(); 930 OWLClassExpression currentDescription = startNode.getConcept(); 931 Set<OWLIndividual> currentCoveredPos = startNode.getCoveredPositives(); 932 Set<OWLIndividual> currentCoveredNeg = startNode.getCoveredNegatives(); 933 double currentAccuracy = startNode.getAccuracy(); 934 int currentMisclassifications = nrOfPositiveExamples - currentCoveredPos.size() 935 + currentCoveredNeg.size(); 936 logger.debug("tree traversal start node " 937 + startNode 938 .getShortDescription()); 939 logger.debug("tree traversal start accuracy: " + currentAccuracy); 940 int i = 0; 941 // start from the most promising nodes 942 SortedSet<ExampleBasedNode> reverseView = searchTreeStable.descendingSet(); 943 for (ExampleBasedNode currNode : reverseView) { 944 // compute covered positives and negatives 945 SortedSet<OWLIndividual> newCoveredPositives = new TreeSet<>(currentCoveredPos); 946 newCoveredPositives.retainAll(currNode.getCoveredPositives()); 947 SortedSet<OWLIndividual> newCoveredNegatives = new TreeSet<>(currentCoveredNeg); 948 newCoveredNegatives.retainAll(currNode.getCoveredNegatives()); 949 950 // compute the accuracy we would get by adding this node 951 double accuracy = (newCoveredPositives.size() + nrOfNegativeExamples - newCoveredNegatives.size()) 952 / (double) (nrOfPositiveExamples + nrOfNegativeExamples); 953 int misclassifications = nrOfPositiveExamples - newCoveredPositives.size() + newCoveredNegatives.size(); 954 int misclassifiedPositives = nrOfPositiveExamples - newCoveredPositives.size(); 955 956 int lostPositives = currentCoveredPos.size() - newCoveredPositives.size(); 957 958 // TODO: maybe we should also consider a minimum improvement when adding something 959 // otherwise we could overfit 960 // we give double weith to lost positives, i.e. when one positive is 961 // lost at least two negatives need to be uncovered 962 boolean consider = (misclassifications + lostPositives < currentMisclassifications) 963 && (misclassifiedPositives <= allowedMisclassifications); 964 965 // concept has been chosen, so construct it 966 if (consider) { 967 968 // construct a new concept as intersection of both 969 OWLClassExpression mc = dataFactory.getOWLObjectIntersectionOf(currentDescription, currNode.getConcept()); 970 971 mc = ConceptTransformation.cleanConceptNonRecursive(mc); 972 mc = mc.getNNF(); 973 974 logger.debug("misclassifications: " + misclassifications); 975 logger.debug("misclassified positives: " + misclassifiedPositives); 976 logger.debug("accuracy: " + accuracy); 977 978 // update variables 979 currentDescription = mc; 980 currentCoveredPos = newCoveredPositives; 981 currentCoveredNeg = newCoveredNegatives; 982 currentMisclassifications = misclassifications; 983 //noinspection UnusedAssignment 984 currentAccuracy = accuracy; 985 986 if (accuracy > 1 - (noisePercentage / 100)) { 987 logger.info("traversal found " + mc); 988 logger.info("accuracy: " + accuracy); 989 System.exit(0); 990 } 991 } 992 993 i++; 994 if (i == 1000) 995 break; 996 } 997 998 } 999 1000 // we look for a node covering many positives and hopefully 1001 // few negatives; we give a strong penalty on uncovered positives 1002 private ExampleBasedNode findBestTraversalStartNode() { 1003 // 2 points for each covered pos + 1 point for each uncovered neg 1004 int currScore = 0; 1005 int i = 0; 1006 ExampleBasedNode currNode = null; 1007 SortedSet<ExampleBasedNode> reverseView = searchTreeStable.descendingSet(); 1008 for (ExampleBasedNode node : reverseView) { 1009 int score = 2 * node.getCoveredPositives().size() 1010 + (nrOfNegativeExamples - node.getCoveredNegatives().size()); 1011 if (score > currScore) { 1012 currScore = score; 1013 currNode = node; 1014 } 1015 i++; 1016 // limit search because stable candidate set can grow very large 1017 if (i == 10000) 1018 break; 1019 } 1020 return currNode; 1021 } 1022 1023 private void reduceCandidates() { 1024 Iterator<ExampleBasedNode> it = searchTreeStable.descendingIterator(); 1025 Set<ExampleBasedNode> promisingNodes = new HashSet<>(); 1026 int i = 0; 1027 while (it.hasNext() && promisingNodes.size() < candidatePostReductionSize) { 1028 ExampleBasedNode node = it.next(); 1029 // first criterion: the considered node should have an accuracy gain over its parent 1030 // (avoids to use only the most promising node + all its refinements with equal accuracy) 1031 boolean hasAccuracyGain = (node.getParent() == null) 1032 || (node.getCoveredPositives().size() != node.getParent().getCoveredPositives().size()) 1033 || (node.getCoveredNegatives().size() != node.getParent().getCoveredNegatives().size()); 1034 // second criterion: uncovered positives; it does not make much sense to pick nodes with 1035 // low potential for reaching a solution (already at the limit of misclassified positives) 1036 int misclassifiedPositives = nrOfPositiveExamples - node.getCoveredPositives().size(); 1037 boolean hasRefinementPotential = (misclassifiedPositives <= Math.floor(0.65d * allowedMisclassifications)); 1038 boolean keep = hasAccuracyGain && hasRefinementPotential; 1039 if (keep) { 1040 promisingNodes.add(node); 1041 } 1042 i++; 1043 } 1044 searchTree.retainAll(promisingNodes); 1045 logger.debug("searched " + i + " nodes and picked the following promising descriptions:"); 1046 if (logger.isDebugEnabled()) { 1047 for (ExampleBasedNode node : promisingNodes) 1048 logger.debug(node.getShortDescription()); 1049 } 1050 } 1051 1052 public OWLClassExpression getBestSolution() { 1053 return searchTreeStable.best().getConcept(); 1054 } 1055 1056 public List<OWLClassExpression> getCurrentlyBestDescriptions() { 1057 List<OWLClassExpression> best = new LinkedList<>(); 1058 int i = 0; 1059 int nrOfSolutions = 200; 1060 for (ExampleBasedNode n : searchTreeStable.descendingSet()) { 1061 best.add(n.getConcept()); 1062 if (i == nrOfSolutions) 1063 return best; 1064 i++; 1065 } 1066 return best; 1067 } 1068 1069 public TreeSet<EvaluatedDescriptionPosNeg> getCurrentlyBestEvaluatedDescriptions() { 1070 Iterator<ExampleBasedNode> it = searchTreeStable.descendingIterator(); 1071 int count = 0; 1072 TreeSet<EvaluatedDescriptionPosNeg> cbd = new TreeSet<>(edComparator); 1073 while (it.hasNext()) { 1074 ExampleBasedNode eb = it.next(); 1075 cbd.add(new EvaluatedDescriptionPosNeg(eb.getConcept(), getScore(eb.getConcept()))); 1076 // return a maximum of 200 elements (we need a maximum, because the 1077 // candidate set can be very large) 1078 if (count > 200) 1079 return cbd; 1080 count++; 1081 } 1082 return cbd; 1083 } 1084 1085 public void printBestSolutions(int nrOfSolutions) { 1086 // QUALITY: could be optimized 1087 if (!logger.isTraceEnabled()) { 1088 return; 1089 } 1090 if (nrOfSolutions == 0) 1091 nrOfSolutions = searchTreeStable.size(); 1092 int i = 0; 1093 for (ExampleBasedNode n : searchTreeStable.descendingSet()) { 1094 if (n.getAccuracy() < 1) 1095 break; 1096 logger.trace("best: " 1097 + n.getShortDescription()); 1098 if (i == nrOfSolutions) 1099 break; 1100 i++; 1101 } 1102 1103 } 1104 1105 public ScorePosNeg getSolutionScore() { 1106 return ((PosNegLP) learningProblem).computeScore(getBestSolution()); 1107 } 1108 1109 private ScorePosNeg getScore(OWLClassExpression d) { 1110 return ((PosNegLP) learningProblem).computeScore(d); 1111 } 1112 1113 public ExampleBasedNode getStartNode() { 1114 return searchTreeStable.getRoot(); 1115 } 1116 1117 /** 1118 * In this function it is calculated whether the algorithm should stop. 1119 * This is not always depends whether an actual solution was found 1120 * The algorithm stops if: 1121 * 1. the object attribute stop is set to true (possibly by an outside source) 1122 * 2. the maximimum execution time is reached 1123 * 3. the maximum number of class description tests is reached 1124 * 1125 * Continuation criteria and result improvement 1126 * The algorithm continues (although it would normally stop) if 1127 * 1. Minimum execution time is not reached (default 0) 1128 * 2. not enough good solutions are found (default 1) 1129 * otherwise it stops 1130 * 1131 * @return true if the algorithm should stop, this is mostly indepent of the question if a solution was found 1132 */ 1133 private boolean isTerminationCriteriaReached() { 1134 // algorithm was stopped from outside 1135 if (this.stop) { 1136 return true; 1137 } 1138 1139 long totalTimeNeeded = System.currentTimeMillis() - this.runtime; 1140 long maxMilliSeconds = maxExecutionTimeInSeconds * 1000; 1141 long minMilliSeconds = minExecutionTimeInSeconds * 1000; 1142 int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList; 1143 boolean result = false; 1144 1145 //ignore default 1146 if (maxExecutionTimeInSeconds == 0) 1147 result = false; 1148 //alreadyReached 1149 else if (maxExecutionTimeAlreadyReached) 1150 return true; 1151 //test 1152 else if (maxMilliSeconds < totalTimeNeeded) { 1153 this.stop(); 1154 logger.info("Maximum time (" + maxExecutionTimeInSeconds 1155 + " seconds) reached, stopping now..."); 1156 maxExecutionTimeAlreadyReached = true; 1157 return true; 1158 } 1159 1160 //ignore default 1161 if (maxClassDescriptionTests == 0) 1162 result = false; 1163 //test 1164 else if (conceptTests >= maxClassDescriptionTests) { 1165 logger.info("Maximum Class OWLClassExpression tests (" + maxClassDescriptionTests 1166 + " tests [actual: " + conceptTests + "]) reached, stopping now..."); 1167 return true; 1168 } 1169 1170 // we stop if sufficiently many solutions (concepts fitting the noise parameter) have been 1171 // reached - unless this termination criterion is switched off using terminateOnNoiseReached = false 1172 if (guaranteeXgoodAlreadyReached) { 1173 result = true; 1174 } else if (solutions.size() >= guaranteeXgoodDescriptions && terminateOnNoiseReached) { 1175 if (guaranteeXgoodDescriptions != 1) { 1176 logger.info("Minimum number (" + guaranteeXgoodDescriptions + ") of good descriptions reached."); 1177 1178 guaranteeXgoodAlreadyReached = true; 1179 result = true; 1180 } 1181 } 1182 1183 if (minExecutionTimeAlreadyReached) { 1184 result = result && true; 1185 } else if (minMilliSeconds < totalTimeNeeded) { 1186 if (minExecutionTimeInSeconds != 0) { 1187 logger.info("Minimum time (" + minExecutionTimeInSeconds + " seconds) reached."); 1188 } 1189 minExecutionTimeAlreadyReached = true; 1190 result = result && true; 1191 } else { 1192 result = false; 1193 } 1194 1195 return result; 1196 1197 } 1198 1199 public boolean isUseTreeTraversal() { 1200 return useTreeTraversal; 1201 } 1202 1203 public void setUseTreeTraversal(boolean useTreeTraversal) { 1204 this.useTreeTraversal = useTreeTraversal; 1205 } 1206 1207 public boolean isUseCandidateReduction() { 1208 return useCandidateReduction; 1209 } 1210 1211 public void setUseCandidateReduction(boolean useCandidateReduction) { 1212 this.useCandidateReduction = useCandidateReduction; 1213 } 1214 1215 public int getCandidatePostReductionSize() { 1216 return candidatePostReductionSize; 1217 } 1218 1219 public void setCandidatePostReductionSize(int candidatePostReductionSize) { 1220 this.candidatePostReductionSize = candidatePostReductionSize; 1221 } 1222 1223 public boolean isComputeBenchmarkInformation() { 1224 return computeBenchmarkInformation; 1225 } 1226 1227 public void setComputeBenchmarkInformation(boolean computeBenchmarkInformation) { 1228 this.computeBenchmarkInformation = computeBenchmarkInformation; 1229 } 1230 1231 @Override 1232 public OWLClassExpression getCurrentlyBestDescription() { 1233 return getBestSolution(); 1234 } 1235 1236 @Override 1237 public EvaluatedDescriptionPosNeg getCurrentlyBestEvaluatedDescription() { 1238 return new EvaluatedDescriptionPosNeg(getBestSolution(), getSolutionScore()); 1239 } 1240 1241 public LengthLimitedRefinementOperator getRefinementOperator() { 1242 return operator; 1243 } 1244 1245 public LengthLimitedRefinementOperator getOperator() { 1246 return operator; 1247 } 1248 1249 @Autowired(required = false) 1250 public void setOperator(LengthLimitedRefinementOperator operator) { 1251 this.operator = operator; 1252 } 1253 1254 public boolean isWriteSearchTree() { 1255 return writeSearchTree; 1256 } 1257 1258 public void setWriteSearchTree(boolean writeSearchTree) { 1259 this.writeSearchTree = writeSearchTree; 1260 } 1261 1262 public File getSearchTreeFile() { 1263 return searchTreeFile; 1264 } 1265 1266 public void setSearchTreeFile(File searchTreeFile) { 1267 this.searchTreeFile = searchTreeFile; 1268 } 1269 1270 public boolean isReplaceSearchTree() { 1271 return replaceSearchTree; 1272 } 1273 1274 public void setReplaceSearchTree(boolean replaceSearchTree) { 1275 this.replaceSearchTree = replaceSearchTree; 1276 } 1277 1278 public boolean isUseTooWeakList() { 1279 return useTooWeakList; 1280 } 1281 1282 public void setUseTooWeakList(boolean useTooWeakList) { 1283 this.useTooWeakList = useTooWeakList; 1284 } 1285 1286 public boolean isUseOverlyGeneralList() { 1287 return useOverlyGeneralList; 1288 } 1289 1290 public void setUseOverlyGeneralList(boolean useOverlyGeneralList) { 1291 this.useOverlyGeneralList = useOverlyGeneralList; 1292 } 1293 1294 public boolean isUseShortConceptConstruction() { 1295 return useShortConceptConstruction; 1296 } 1297 1298 public void setUseShortConceptConstruction(boolean useShortConceptConstruction) { 1299 this.useShortConceptConstruction = useShortConceptConstruction; 1300 } 1301 1302 public boolean isImproveSubsumptionHierarchy() { 1303 return improveSubsumptionHierarchy; 1304 } 1305 1306 public void setImproveSubsumptionHierarchy(boolean improveSubsumptionHierarchy) { 1307 this.improveSubsumptionHierarchy = improveSubsumptionHierarchy; 1308 } 1309 1310 public double getNoisePercentage() { 1311 return noisePercentage; 1312 } 1313 1314 public void setNoisePercentage(double noisePercentage) { 1315 this.noisePercentage = noisePercentage; 1316 } 1317 1318 public OWLClassExpression getStartClass() { 1319 return startClass; 1320 } 1321 1322 public void setStartClass(OWLClass startClass) { 1323 this.startClass = startClass; 1324 } 1325 1326 public boolean isUsePropernessChecks() { 1327 return usePropernessChecks; 1328 } 1329 1330 public void setUsePropernessChecks(boolean usePropernessChecks) { 1331 this.usePropernessChecks = usePropernessChecks; 1332 } 1333 1334 public boolean isForceRefinementLengthIncrease() { 1335 return forceRefinementLengthIncrease; 1336 } 1337 1338 public void setForceRefinementLengthIncrease(boolean forceRefinementLengthIncrease) { 1339 this.forceRefinementLengthIncrease = forceRefinementLengthIncrease; 1340 } 1341 1342 public int getMinExecutionTimeInSeconds() { 1343 return minExecutionTimeInSeconds; 1344 } 1345 1346 public void setMinExecutionTimeInSeconds(int minExecutionTimeInSeconds) { 1347 this.minExecutionTimeInSeconds = minExecutionTimeInSeconds; 1348 } 1349 1350 public int getGuaranteeXgoodDescriptions() { 1351 return guaranteeXgoodDescriptions; 1352 } 1353 1354 public void setGuaranteeXgoodDescriptions(int guaranteeXgoodDescriptions) { 1355 this.guaranteeXgoodDescriptions = guaranteeXgoodDescriptions; 1356 } 1357 1358 public int getMaxClassDescriptionTests() { 1359 return maxClassDescriptionTests; 1360 } 1361 1362 public void setMaxClassDescriptionTests(int maxClassDescriptionTests) { 1363 this.maxClassDescriptionTests = maxClassDescriptionTests; 1364 } 1365 1366 public boolean isShowBenchmarkInformation() { 1367 return showBenchmarkInformation; 1368 } 1369 1370 public void setShowBenchmarkInformation(boolean showBenchmarkInformation) { 1371 this.showBenchmarkInformation = showBenchmarkInformation; 1372 } 1373 1374 public double getNegativeWeight() { 1375 return negativeWeight; 1376 } 1377 1378 public void setNegativeWeight(double negativeWeight) { 1379 this.negativeWeight = negativeWeight; 1380 } 1381 1382 public double getStartNodeBonus() { 1383 return startNodeBonus; 1384 } 1385 1386 public void setStartNodeBonus(double startNodeBonus) { 1387 this.startNodeBonus = startNodeBonus; 1388 } 1389 1390 public double getExpansionPenaltyFactor() { 1391 return expansionPenaltyFactor; 1392 } 1393 1394 public void setExpansionPenaltyFactor(double expansionPenaltyFactor) { 1395 this.expansionPenaltyFactor = expansionPenaltyFactor; 1396 } 1397 1398 public int getNegationPenalty() { 1399 return negationPenalty; 1400 } 1401 1402 public void setNegationPenalty(int negationPenalty) { 1403 this.negationPenalty = negationPenalty; 1404 } 1405 1406 public boolean isTerminateOnNoiseReached() { 1407 return terminateOnNoiseReached; 1408 } 1409 1410 public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) { 1411 this.terminateOnNoiseReached = terminateOnNoiseReached; 1412 } 1413 1414 public OWLClassExpressionLengthMetric getLengthMetric() { 1415 return lengthMetric; 1416 } 1417 1418 @Autowired(required = false) 1419 public void setLengthMetric(OWLClassExpressionLengthMetric lengthMetric) { 1420 this.lengthMetric = lengthMetric; 1421 if (operator != null) { 1422 operator.setLengthMetric(lengthMetric); 1423 } 1424 } 1425 1426 @Autowired(required = false) 1427 public void setHeuristic(ExampleBasedHeuristic heuristic) { 1428 this.heuristic = heuristic; 1429 } 1430 1431 public ExampleBasedHeuristic getHeuristic() { 1432 return heuristic; 1433 } 1434}