001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.learningproblems; 020 021import com.google.common.collect.Sets; 022import org.dllearner.accuracymethods.*; 023import org.dllearner.core.*; 024import org.dllearner.core.config.ConfigOption; 025import org.dllearner.utilities.ReasoningUtils; 026import org.dllearner.utilities.ReasoningUtils.Coverage; 027import org.dllearner.utilities.ReasoningUtilsCLP; 028import org.semanticweb.owlapi.model.*; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031import org.springframework.beans.factory.annotation.Autowired; 032import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 033 034import java.util.*; 035 036/** 037 * The problem of learning the OWL class expression of an existing class 038 * in an OWL ontology. 039 * 040 * @author Jens Lehmann 041 * 042 */ 043@ComponentAnn(name = "ClassLearningProblem", shortName = "clp", version = 0.6) 044public class ClassLearningProblem extends AbstractClassExpressionLearningProblem<ClassScore> { 045 046 private static Logger logger = LoggerFactory.getLogger(ClassLearningProblem.class); 047 private long nanoStartTime; 048 @ConfigOption(defaultValue = "10", 049 description = "Maximum execution time in seconds") 050 private int maxExecutionTimeInSeconds = 10; 051 052 @ConfigOption(description = "class of which an OWL class expression should be learned", 053 required = true) 054 private OWLClass classToDescribe; 055 056 private List<OWLIndividual> classInstances; 057 private TreeSet<OWLIndividual> classInstancesSet; 058 @ConfigOption(defaultValue = "true", 059 description = "Whether this is an equivalence problem (or superclass learning problem)") 060 private boolean equivalence = true; 061 062 @ConfigOption(description = "beta index for F-measure in super class learning", 063 required = false, 064 defaultValue = "3.0") 065 private double betaSC = 3.0; 066 067 @ConfigOption(description = "beta index for F-measure in definition learning", 068 required = false, 069 defaultValue = "1.0") 070 private double betaEq = 1.0; 071 072 // instances of super classes excluding instances of the class itself 073 private List<OWLIndividual> superClassInstances; 074 // instances of super classes including instances of the class itself 075 private List<OWLIndividual> classAndSuperClassInstances; 076 // specific variables for generalised F-measure 077 private TreeSet<OWLIndividual> negatedClassInstances; 078 079 @ConfigOption(description = "Specifies, which method/function to use for computing accuracy. Available measues are \"pred_acc\" (predictive accuracy), \"fmeasure\" (F measure), \"generalised_fmeasure\" (generalised F-Measure according to Fanizzi and d'Amato).", 080 defaultValue = "PRED_ACC") 081 protected AccMethod accuracyMethod; 082 083 @ConfigOption(description = "whether to check for consistency of suggestions (when added to ontology)", 084 required = false, 085 defaultValue = "true") 086 private boolean checkConsistency = true; 087 088 private OWLDataFactory df = new OWLDataFactoryImpl(); 089 090 public ClassLearningProblem() { 091 092 } 093 094 @Override 095 protected ReasoningUtils newReasoningUtils(AbstractReasonerComponent reasoner) { 096 return new ReasoningUtilsCLP(this, reasoner); 097 } 098 099 public ClassLearningProblem(AbstractReasonerComponent reasoner) { 100 super(reasoner); 101 } 102 103 @Override 104 public void init() throws ComponentInitException { 105 106 if (accuracyMethod != null && accuracyMethod instanceof AccMethodPredAccApprox) { 107 logger.warn("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works."); 108 } 109 110 if (!getReasoner().getClasses().contains(classToDescribe)) { 111 throw new ComponentInitException("The class \"" + classToDescribe + "\" does not exist. Make sure you spelled it correctly."); 112 } 113 114 classInstances = new LinkedList<>(getReasoner().getIndividuals(classToDescribe)); 115 // sanity check 116 if (classInstances.size() == 0) { 117 throw new ComponentInitException("Class " + classToDescribe + " has 0 instances according to \"" + AnnComponentManager.getName(getReasoner().getClass()) + "\". Cannot perform class learning with 0 instances."); 118 } 119 120 classInstancesSet = new TreeSet<>(classInstances); 121 122 double coverageFactor; 123 if (equivalence) { 124 coverageFactor = betaEq; 125 } else { 126 coverageFactor = betaSC; 127 } 128 129 // we compute the instances of the super class to perform 130 // optimisations later on 131 Set<OWLClassExpression> superClasses = getReasoner().getSuperClasses(classToDescribe); 132 TreeSet<OWLIndividual> superClassInstancesTmp = new TreeSet<>(getReasoner().getIndividuals()); 133 for (OWLClassExpression superClass : superClasses) { 134 superClassInstancesTmp.retainAll(getReasoner().getIndividuals(superClass)); 135 } 136 // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ... 137 classAndSuperClassInstances = new LinkedList<>(superClassInstancesTmp); 138 // ... and a second list not including them 139 superClassInstancesTmp.removeAll(classInstances); 140 // since we use the instance list for approximations, we want to avoid 141 // any bias through URI names, so we shuffle the list once pseudo-randomly 142 superClassInstances = new LinkedList<>(superClassInstancesTmp); 143 Random rand = new Random(1); 144 Collections.shuffle(classInstances, rand); 145 Collections.shuffle(superClassInstances, rand); 146 147 if (accuracyMethod == null) { 148 accuracyMethod = new AccMethodPredAcc(true); 149 } 150 if (accuracyMethod instanceof AccMethodApproximate) { 151 ((AccMethodApproximate) accuracyMethod).setReasoner(getReasoner()); 152 } 153 if (accuracyMethod instanceof AccMethodThreeValued) { 154 Coverage[] cc = reasoningUtil.getCoverage(df.getOWLObjectComplementOf(classToDescribe), superClassInstances); 155 negatedClassInstances = Sets.newTreeSet(cc[0].trueSet); 156// System.out.println("negated class instances: " + negatedClassInstances); 157 } 158 if (accuracyMethod instanceof AccMethodWithBeta) { 159 ((AccMethodWithBeta)accuracyMethod).setBeta(coverageFactor); 160 } 161 162// System.out.println(classInstances.size() + " " + superClassInstances.size()); 163 164 initialized = true; 165 } 166 167 @Override 168 public ClassScore computeScore(OWLClassExpression description, double noise) { 169 170 // TODO: reuse code to ensure that we never return inconsistent results 171 // between getAccuracy, getAccuracyOrTooWeak and computeScore 172 Coverage[] cc = ((ReasoningUtilsCLP)reasoningUtil).getCoverageCLP(description, classInstances, superClassInstances); 173 174 double recall = Heuristics.divideOrZero(cc[0].trueCount, classInstances.size()); // tp / (tp+fn) 175 double precision = Heuristics.divideOrZero(cc[0].trueCount, cc[0].trueCount + cc[1].trueCount); // tp / (tp+fp) 176 // for each OWLClassExpression with less than 100% coverage, we check whether it is 177 // leads to an inconsistent knowledge base 178 179 double acc; 180 if (accuracyMethod instanceof AccMethodTwoValued) { 181 acc = reasoningUtil.getAccuracyOrTooWeakExact2((AccMethodTwoValued) accuracyMethod, cc, noise); 182 } else if (accuracyMethod instanceof AccMethodThreeValued) { 183 acc = ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeakExact3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise); 184 } else { 185 throw new RuntimeException(); 186 } 187 188 if (checkConsistency) { 189 190 // we check whether the axiom already follows from the knowledge base 191 boolean followsFromKB = followsFromKB(description); 192 193 // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) 194 // (if the axiom follows, then the knowledge base remains consistent) 195 boolean isConsistent = followsFromKB || isConsistent(description); 196 197 return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc, isConsistent, followsFromKB); 198 199 } else { 200 return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc); 201 } 202 } 203 204 public boolean isEquivalenceProblem() { 205 return equivalence; 206 } 207 208 @Override 209 public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) { 210 nanoStartTime = System.nanoTime(); 211 if (accuracyMethod instanceof AccMethodThreeValued) { 212 return ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeak3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise); 213 } else if (accuracyMethod instanceof AccMethodTwoValued) { 214 return reasoningUtil.getAccuracyOrTooWeak2((AccMethodTwoValued) accuracyMethod, description, classInstances, superClassInstances, noise); 215 } else { 216 throw new RuntimeException(); 217 } 218 } 219 220 /** 221 * @return whether the description test should be aborted because time expired 222 */ 223 public boolean terminationTimeExpired() { 224 boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds * 1000000000L)); 225 if (val) { 226 logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds."); 227 } 228 return val; 229 } 230 231 // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf 232 // for all methods below (currently dummies) 233 234 /** 235 * @return the classToDescribe 236 */ 237 public OWLClass getClassToDescribe() { 238 return classToDescribe; 239 } 240 241 public void setClassToDescribe(OWLClass classToDescribe) { 242 this.classToDescribe = classToDescribe; 243 } 244 245 public void setClassToDescribe(IRI classIRI) { 246 this.classToDescribe = df.getOWLClass(classIRI); 247 } 248 249 /* (non-Javadoc) 250 * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description) 251 */ 252 @Override 253 public EvaluatedDescriptionClass evaluate(OWLClassExpression description, double noise) { 254 ClassScore score = computeScore(description, noise); 255 return new EvaluatedDescriptionClass(description, score); 256 } 257 258 /** 259 * @return the isConsistent 260 */ 261 public boolean isConsistent(OWLClassExpression description) { 262 OWLAxiom axiom; 263 if (equivalence) { 264 axiom = df.getOWLEquivalentClassesAxiom(classToDescribe, description); 265 } else { 266 axiom = df.getOWLSubClassOfAxiom(classToDescribe, description); 267 } 268 return getReasoner().remainsSatisfiable(axiom); 269 } 270 271 public boolean followsFromKB(OWLClassExpression description) { 272 return equivalence ? getReasoner().isEquivalentClass(description, classToDescribe) : getReasoner().isSuperClassOf(description, classToDescribe); 273 } 274 275 public int getMaxExecutionTimeInSeconds() { 276 return maxExecutionTimeInSeconds; 277 } 278 279 public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) { 280 this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; 281 } 282 283 public boolean isEquivalence() { 284 return equivalence; 285 } 286 287 public void setEquivalence(boolean equivalence) { 288 this.equivalence = equivalence; 289 } 290 291 public double getBetaSC() { 292 return betaSC; 293 } 294 295 public void setBetaSC(double betaSC) { 296 this.betaSC = betaSC; 297 } 298 299 public double getBetaEq() { 300 return betaEq; 301 } 302 303 public void setBetaEq(double betaEq) { 304 this.betaEq = betaEq; 305 } 306 307 public boolean isCheckConsistency() { 308 return checkConsistency; 309 } 310 311 public void setCheckConsistency(boolean checkConsistency) { 312 this.checkConsistency = checkConsistency; 313 } 314 315 public AccMethod getAccuracyMethod() { 316 return accuracyMethod; 317 } 318 319 @Autowired(required = false) 320 public void setAccuracyMethod(AccMethod accuracyMethod) { 321 this.accuracyMethod = accuracyMethod; 322 } 323 324 public double getRecall(OWLClassExpression description) { 325 ReasoningUtils.CoverageCount[] cc = reasoningUtil.getCoverageCount(description, classInstancesSet); 326 if (cc == null) { 327 return 0; 328 } 329 return cc[0].trueCount/(double)cc[0].total; 330 } 331} 332