001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.el; 020 021import com.jamonapi.Monitor; 022import com.jamonapi.MonitorFactory; 023import org.apache.log4j.Logger; 024import org.dllearner.core.*; 025import org.dllearner.core.config.ConfigOption; 026import org.dllearner.core.owl.ClassHierarchy; 027import org.dllearner.core.owl.DatatypePropertyHierarchy; 028import org.dllearner.core.owl.ObjectPropertyHierarchy; 029import org.dllearner.learningproblems.ClassLearningProblem; 030import org.dllearner.refinementoperators.ELDown; 031import org.dllearner.utilities.Files; 032import org.dllearner.utilities.Helper; 033import org.dllearner.utilities.OWLAPIUtils; 034import org.dllearner.utilities.owl.EvaluatedDescriptionSet; 035import org.dllearner.utilities.owl.OWLClassExpressionMinimizer; 036import org.dllearner.utilities.owl.OWLClassExpressionUtils; 037import org.semanticweb.owlapi.manchestersyntax.parser.ManchesterOWLSyntaxParserException; 038import org.semanticweb.owlapi.model.OWLClass; 039import org.semanticweb.owlapi.model.OWLClassExpression; 040 041import java.io.File; 042import java.util.List; 043import java.util.Set; 044import java.util.TreeSet; 045 046/** 047 * A learning algorithm for EL, which is based on an 048 * ideal refinement operator. 049 * 050 * TODO redundancy check 051 * 052 * @author Jens Lehmann 053 * 054 */ 055@ComponentAnn(name="ELTL", shortName="eltl", version=0.5, description="ELTL is an algorithm based on the refinement operator in http://jens-lehmann.org/files/2009/el_ilp.pdf.") 056public class ELLearningAlgorithm extends AbstractCELA { 057 058 private static Logger logger = Logger.getLogger(ELLearningAlgorithm.class); 059 060 @ConfigOption(required=false, defaultValue="true", description="Specifies whether to use real disjointness checks or instance based ones (no common instances) in the refinement operator.") 061 private boolean instanceBasedDisjoints = true; 062 063 @ConfigOption(defaultValue="false", description="algorithm will terminate immediately when a correct definition is found") 064 private boolean stopOnFirstDefinition = false; 065 066 @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples") 067 private double noisePercentage = 0.0; 068 069 @ConfigOption(defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.") 070 private OWLClassExpression startClass; 071 072 @ConfigOption(defaultValue="false", description="specifies whether to write a search tree") 073 private boolean writeSearchTree = false; 074 075 @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree") 076 private String searchTreeFile = "log/searchTree.txt"; 077 078 @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree") 079 private boolean replaceSearchTree = false; 080 081 @ConfigOption(defaultValue="2",description="The maximum depth for class expressions to test") 082 private int maxClassExpressionDepth = 2; 083 084 @ConfigOption(defaultValue="10",description="Sets the maximum number of results one is interested in") 085 private int maxNrOfResults = 10; 086 087 private Set<OWLClass> ignoredConcepts = null; 088 089 @ConfigOption(description="class of which an OWL class expression should be learned") 090 private OWLClass classToDescribe; 091 092 private double noise; 093 094 // a set with limited size (currently the ordering is defined in the class itself) 095 private SearchTreeNode startNode; 096 @ConfigOption(defaultValue="StableHeuristic", description="The heuristic variable to use for ELTL") 097 private ELHeuristic heuristic; 098 private TreeSet<SearchTreeNode> candidates; 099 private ELDown operator; 100 101 private boolean isEquivalenceProblem = true; 102 private Monitor timeMonitor; 103 104 double max = -1d; 105 OWLClassExpression maxDescription; 106 107 public ELLearningAlgorithm() {} 108 109 public ELLearningAlgorithm(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner) { 110 super(problem, reasoner); 111 } 112 113 @Override 114 public void init() throws ComponentInitException { 115 // currently we use the stable heuristic 116 if(heuristic == null){ 117 heuristic = new StableHeuristic(); 118 } 119 120 candidates = new TreeSet<>(heuristic); 121 122 ClassHierarchy classHierarchy = initClassHierarchy(); 123 ObjectPropertyHierarchy obHierarchy = initObjectPropertyHierarchy(); 124 DatatypePropertyHierarchy dpHierarchy = initDataPropertyHierarchy(); 125 126 operator = new ELDown(reasoner, instanceBasedDisjoints, classHierarchy, obHierarchy, dpHierarchy); 127 operator.setMaxClassExpressionDepth(maxClassExpressionDepth); 128 operator.init(); 129 130 noise = noisePercentage/100d; 131 132 bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults); 133 134 minimizer = new OWLClassExpressionMinimizer(dataFactory, reasoner); 135 136 timeMonitor = MonitorFactory.getTimeMonitor("eltl-time"); 137 138 initialized = true; 139 } 140 141 @Override 142 public void start() { 143 stop = false; 144 isRunning = true; 145 reset(); 146 nanoStartTime = System.nanoTime(); 147 148 // create start node 149 if(startClass == null){ 150 startClass = dataFactory.getOWLThing(); 151 } else { 152 try { 153 this.startClass = OWLAPIUtils.classExpressionPropertyExpander(startClass, reasoner, dataFactory); 154 } catch (ManchesterOWLSyntaxParserException e) { 155 logger.info("Error parsing startClass: " + e.getMessage()); 156 this.startClass = dataFactory.getOWLThing(); 157 } 158 } 159 logger.info("Start class: " + startClass); 160 161 ELDescriptionTree top = new ELDescriptionTree(reasoner, startClass); 162 addDescriptionTree(top, null); 163 164 double highestAccuracy = 0.0; 165 166 // main loop 167 int loop = 0; 168 while(!stop && !stoppingCriteriaSatisfied()) { 169 // pick the best candidate according to the heuristic 170 SearchTreeNode best = candidates.pollLast(); 171 172 // apply operator 173 List<ELDescriptionTree> refinements = operator.refine(best.getDescriptionTree()); 174 175 // add all refinements to search tree, candidates, best descriptions 176 for(ELDescriptionTree refinement : refinements) { 177 addDescriptionTree(refinement, best); 178 } 179 180 // logging 181 if(logger.isTraceEnabled()) { 182 logger.trace("Chosen node " + best); 183 logger.trace(startNode.getTreeString(renderer)); 184 logger.trace("Loop " + loop + " completed."); 185 } 186 187 if(bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) { 188 highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy(); 189 long durationInMillis = getCurrentRuntimeInMilliSeconds(); 190 String durationStr = getDurationAsString(durationInMillis); 191 logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found after " + durationStr + ": " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); 192 } 193 194 if(writeSearchTree) { 195 writeSearchTree(); 196 } 197 198 } 199 200 // print solution(s) 201 logger.info("solutions[time: " + Helper.prettyPrintNanoSeconds(System.nanoTime()-nanoStartTime) + "]\n" + getSolutionString()); 202 203 isRunning = false; 204 } 205 206 // evaluates a class expression in tree form 207 private void addDescriptionTree(ELDescriptionTree descriptionTree, SearchTreeNode parentNode) { 208 // create search tree node 209 SearchTreeNode node = new SearchTreeNode(descriptionTree); 210 211 // convert tree to standard class expression 212 OWLClassExpression classExpression = descriptionTree.transformToClassExpression(); 213 214 if(classExpression.equals(startClass) || isDescriptionAllowed(classExpression)){ 215 216 // rewrite class expression 217 classExpression = rewrite(classExpression); 218 219 // compute score 220 Score score = learningProblem.computeScore(classExpression, noise); 221 222 // compute accuracy 223 double accuracy = score.getAccuracy(); 224 225 if(accuracy == -1) { 226 node.setTooWeak(); 227 } else { 228 node.setScore(score); 229 } 230 node.setAccuracy(accuracy); 231 232 // link to parent (unless start node) 233 if(parentNode == null) { 234 startNode = node; 235 } else { 236 parentNode.addChild(node); 237 } 238 239 if(!node.isTooWeak()) { 240 // add as candidate 241 candidates.add(node); 242 243 // check whether we want to add it to the best evaluated descriptions; 244 // to do this we pick the worst considered evaluated description 245 // (remember that the set has limited size, so it's likely not the worst overall); 246 // the class expression has a chance to make it in the set if it has 247 // at least as high accuracy - if not we can save the reasoner calls 248 // for fully computing the evaluated description 249 if(classToDescribe == null || !classToDescribe.equals(classExpression)) { 250 if(!bestEvaluatedDescriptions.isFull() || bestEvaluatedDescriptions.getWorst().getAccuracy() < node.getAccuracy()) { 251 EvaluatedDescription<Score> ed = new EvaluatedDescription<>(classExpression, score); 252 bestEvaluatedDescriptions.add(ed); 253// System.out.println("Add " + ed); 254 } else { 255// EvaluatedDescriptionPosNeg ed = new EvaluatedDescriptionPosNeg(classExpression, score); 256// System.out.println("reject " + ed); 257 } 258 } 259 } 260 } 261 } 262 263 private boolean stoppingCriteriaSatisfied() { 264 // in some cases, there could be no candidate left ... 265 if(candidates.isEmpty()) { 266 logger.info("Stopping algorithm: No candidates left."); 267 return true; 268 } 269 270 // stop when max time is reached 271 boolean timeout = isTimeExpired(); 272 if(timeout) { 273 logger.info("Stopping algorithm: Max. execution time was reached."); 274 return true; 275 } 276 277 // stop if we have a node covering all positives and none of the negatives 278 SearchTreeNode bestNode = candidates.last(); 279 boolean perfectDefinitionFound = bestNode.getAccuracy() == 1.0; 280 if(stopOnFirstDefinition && perfectDefinitionFound) { 281 logger.info("Stopping algorithm: Perfect definition found."); 282 return true; 283 } 284 285 return false; 286 } 287 288 /* 289 * set all values back to their default values (used for running 290 * the algorithm more than once) 291 */ 292 private void reset() { 293 candidates.clear(); 294 bestEvaluatedDescriptions.getSet().clear(); 295 } 296 297 private boolean isDescriptionAllowed(OWLClassExpression description) { 298 if(learningProblem instanceof ClassLearningProblem) { 299 if(isEquivalenceProblem) { 300 // the class to learn must not appear on the outermost property level 301 if(OWLClassExpressionUtils.occursOnFirstLevel(description, classToDescribe)) { 302 return false; 303 } 304 305 //non of the equivalent classes must occur on the first level 306 TreeSet<OWLClassExpression> toTest = new TreeSet<>(); 307 if(classToDescribe != null){ 308 toTest.add(classToDescribe); 309 } 310 while(!toTest.isEmpty()) { 311 OWLClassExpression d = toTest.pollFirst(); 312 if(OWLClassExpressionUtils.occursOnFirstLevel(description, d)) { 313 return false; 314 } 315 toTest.addAll(reasoner.getEquivalentClasses(d)); 316 } 317 } else { 318 // none of the superclasses of the class to learn must appear on the 319 // outermost property level 320 TreeSet<OWLClassExpression> toTest = new TreeSet<>(); 321 if(classToDescribe != null){ 322 toTest.add(classToDescribe); 323 } 324 while(!toTest.isEmpty()) { 325 OWLClassExpression d = toTest.pollFirst(); 326 if(OWLClassExpressionUtils.occursOnFirstLevel(description, d)) { 327 return false; 328 } 329 toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d)); 330 } 331 } 332 return true; 333 } else { 334 // the class to learn must not appear on the outermost property level 335 if(classToDescribe != null && OWLClassExpressionUtils.occursOnFirstLevel(description, classToDescribe)) { 336 return false; 337 } 338 } 339 340 return true; 341 } 342 343 private void writeSearchTree() { 344 StringBuilder treeString = new StringBuilder(); 345 treeString.append(startNode.getTreeString(renderer)).append("\n"); 346 347 // replace or append 348 if (replaceSearchTree) { 349 Files.createFile(new File(searchTreeFile), treeString.toString()); 350 } else { 351 Files.appendToFile(new File(searchTreeFile), treeString.toString()); 352 } 353 } 354 355 /** 356 * @param heuristic the heuristic to set 357 */ 358 public void setHeuristic(ELHeuristic heuristic) { 359 this.heuristic = heuristic; 360 } 361 362 public boolean isInstanceBasedDisjoints() { 363 return instanceBasedDisjoints; 364 } 365 366 public void setInstanceBasedDisjoints(boolean instanceBasedDisjoints) { 367 this.instanceBasedDisjoints = instanceBasedDisjoints; 368 } 369 370 /** 371 * @param stopOnFirstDefinition the stopOnFirstDefinition to set 372 */ 373 public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { 374 this.stopOnFirstDefinition = stopOnFirstDefinition; 375 } 376 377 /** 378 * @return the stopOnFirstDefinition 379 */ 380 public boolean isStopOnFirstDefinition() { 381 return stopOnFirstDefinition; 382 } 383 384 /** 385 * @return the start node 386 */ 387 public SearchTreeNode getStartNode() { 388 return startNode; 389 } 390 391 /** 392 * @return the noise in percentage 393 */ 394 public double getNoisePercentage() { 395 return noisePercentage; 396 } 397 398 /** 399 * @param noisePercentage the noise in percentage to set 400 */ 401 public void setNoisePercentage(double noisePercentage) { 402 this.noisePercentage = noisePercentage; 403 } 404 405 /** 406 * @param startClass the start class to set 407 */ 408 public void setStartClass(OWLClassExpression startClass) { 409 this.startClass = startClass; 410 } 411 412 /** 413 * @return the start class 414 */ 415 public OWLClassExpression getStartClass() { 416 return startClass; 417 } 418 419 /** 420 * @param ignoredConcepts the ignored concepts to set 421 */ 422 @Override 423 public void setIgnoredConcepts(Set<OWLClass> ignoredConcepts) { 424 this.ignoredConcepts = ignoredConcepts; 425 } 426 427 /** 428 * @return the ignored concepts 429 */ 430 @Override 431 public Set<OWLClass> getIgnoredConcepts() { 432 return ignoredConcepts; 433 } 434 435 /** 436 * @param classToDescribe the classToDescribe to set 437 */ 438 public void setClassToDescribe(OWLClass classToDescribe) { 439 this.classToDescribe = classToDescribe; 440 } 441 442 /** 443 * @param maxNrOfResults the maxNrOfResults to set 444 */ 445 public void setMaxNrOfResults(int maxNrOfResults) { 446 this.maxNrOfResults = maxNrOfResults; 447 } 448 449 /** 450 * @param maxClassExpressionDepth the maximum class expression depth to set 451 */ 452 public void setMaxClassExpressionDepth(int maxClassExpressionDepth) { 453 this.maxClassExpressionDepth = maxClassExpressionDepth; 454 } 455 456 /** 457 * @param writeSearchTree the writeSearchTree to set 458 */ 459 public void setWriteSearchTree(boolean writeSearchTree) { 460 this.writeSearchTree = writeSearchTree; 461 } 462 463 /** 464 * @param searchTreeFile the searchTreeFile to set 465 */ 466 public void setSearchTreeFile(String searchTreeFile) { 467 this.searchTreeFile = searchTreeFile; 468 } 469 470 /** 471 * @param replaceSearchTree the replaceSearchTree to set 472 */ 473 public void setReplaceSearchTree(boolean replaceSearchTree) { 474 this.replaceSearchTree = replaceSearchTree; 475 } 476 477}