001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.learningproblems; 020 021import com.google.common.collect.Sets; 022import org.dllearner.accuracymethods.*; 023import org.dllearner.core.*; 024import org.dllearner.core.config.ConfigOption; 025import org.dllearner.utilities.Helper; 026import org.dllearner.utilities.ReasoningUtils; 027import org.dllearner.utilities.ReasoningUtils.Coverage; 028import org.dllearner.utilities.ReasoningUtilsCLP; 029import org.semanticweb.owlapi.model.*; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032import org.springframework.beans.factory.annotation.Autowired; 033import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 034 035import java.util.*; 036 037/** 038 * The problem of learning the OWL class expression for another OWL class expression 039 * in an OWL ontology. 040 * This for example allows to learn domain or range of a property by descibing the following class expressions: 041 * <ul> 042 * <li><code>Dom(p, C) -> âp.â¤</code> </li> 043 * <li><code>Ran(p, C) -> âp^(-1).â¤</code></li> 044 * </ul> 045 * 046 * Note, this learning problem generalizes the {@link ClassLearningProblem}, which will be kept in the code for legacy 047 * reasons. 048 * 049 * @author Lorenz Buehmann 050 * @author Jens Lehmann 051 * 052 */ 053@ComponentAnn(name = "ClassExpressionLearningProblem", shortName = "celp", version = 0.6) 054public class ClassExpressionLearningProblem extends AbstractClassExpressionLearningProblem<ClassScore> { 055 056 private static Logger logger = LoggerFactory.getLogger(ClassExpressionLearningProblem.class); 057 private long nanoStartTime; 058 @ConfigOption(defaultValue = "10", 059 description = "Maximum execution time in seconds") 060 private int maxExecutionTimeInSeconds = 10; 061 062 @ConfigOption(description = "OWL class expression of which an OWL class expression should be learned", 063 required = true) 064 private OWLClassExpression classExpressionToDescribe; 065 066 private List<OWLIndividual> classInstances; 067 private TreeSet<OWLIndividual> classInstancesSet; 068 @ConfigOption(defaultValue = "true", 069 description = "Whether this is an equivalence problem (or superclass learning problem)") 070 private boolean equivalence = true; 071 072 @ConfigOption(description = "beta index for F-measure in super class learning", 073 required = false, 074 defaultValue = "3.0") 075 private double betaSC = 3.0; 076 077 @ConfigOption(description = "beta index for F-measure in definition learning", 078 required = false, 079 defaultValue = "1.0") 080 private double betaEq = 1.0; 081 082 // instances of super classes excluding instances of the class itself 083 private List<OWLIndividual> superClassInstances; 084 // instances of super classes including instances of the class itself 085 private List<OWLIndividual> classAndSuperClassInstances; 086 // specific variables for generalised F-measure 087 private TreeSet<OWLIndividual> negatedClassInstances; 088 089 @ConfigOption(description = "Specifies, which method/function to use for computing accuracy. Available measues are \"pred_acc\" (predictive accuracy), \"fmeasure\" (F measure), \"generalised_fmeasure\" (generalised F-Measure according to Fanizzi and d'Amato).", 090 defaultValue = "PRED_ACC") 091 protected AccMethod accuracyMethod; 092 093 @ConfigOption(description = "whether to check for consistency of suggestions (when added to ontology)", 094 required = false, 095 defaultValue = "true") 096 private boolean checkConsistency = true; 097 098 private OWLDataFactory df = new OWLDataFactoryImpl(); 099 100 public ClassExpressionLearningProblem() {} 101 102 public ClassExpressionLearningProblem(AbstractReasonerComponent reasoner) { 103 super(reasoner); 104 } 105 106 @Override 107 protected ReasoningUtils newReasoningUtils(AbstractReasonerComponent reasoner) { 108 return new ReasoningUtils(reasoner); 109 } 110 111 @Override 112 public void init() throws ComponentInitException { 113 114 if (accuracyMethod != null && accuracyMethod instanceof AccMethodPredAccApprox) { 115 logger.warn("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works."); 116 } 117 118 // check if entities in signature of class expression occur in ontology 119 if (!Helper.checkConceptEntities(getReasoner(), classExpressionToDescribe)) { 120 throw new ComponentInitException("Some entities in \"" + classExpressionToDescribe + "\" do not exist. Make sure you spelled it correctly."); 121 } 122 123 classInstances = new LinkedList<>(getReasoner().getIndividuals(classExpressionToDescribe)); 124 // sanity check 125 if (classInstances.size() == 0) { 126 throw new ComponentInitException("Class " + classExpressionToDescribe + " has 0 instances according to \"" + 127 AnnComponentManager.getName(getReasoner().getClass()) + "\". Cannot perform class learning with 0 instances."); 128 } 129 130 classInstancesSet = new TreeSet<>(classInstances); 131 132 double coverageFactor; 133 if (equivalence) { 134 coverageFactor = betaEq; 135 } else { 136 coverageFactor = betaSC; 137 } 138 139 // we compute the instances of the super class to perform 140 // optimisations later on 141 Set<OWLClassExpression> superClasses = getReasoner().getSuperClasses(classExpressionToDescribe); 142 TreeSet<OWLIndividual> superClassInstancesTmp = new TreeSet<>(getReasoner().getIndividuals()); 143 for (OWLClassExpression superClass : superClasses) { 144 superClassInstancesTmp.retainAll(getReasoner().getIndividuals(superClass)); 145 } 146 // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ... 147 classAndSuperClassInstances = new LinkedList<>(superClassInstancesTmp); 148 // ... and a second list not including them 149 superClassInstancesTmp.removeAll(classInstances); 150 // since we use the instance list for approximations, we want to avoid 151 // any bias through URI names, so we shuffle the list once pseudo-randomly 152 superClassInstances = new LinkedList<>(superClassInstancesTmp); 153 Random rand = new Random(1); 154 Collections.shuffle(classInstances, rand); 155 Collections.shuffle(superClassInstances, rand); 156 157 158 if (accuracyMethod == null) { 159 accuracyMethod = new AccMethodPredAcc(true); 160 } 161 162 if (accuracyMethod instanceof AccMethodApproximate) { 163 ((AccMethodApproximate) accuracyMethod).setReasoner(getReasoner()); 164 } 165 166 if (accuracyMethod instanceof AccMethodThreeValued) { 167 Coverage[] cc = reasoningUtil.getCoverage(df.getOWLObjectComplementOf(classExpressionToDescribe), superClassInstances); 168 negatedClassInstances = Sets.newTreeSet(cc[0].trueSet); 169// System.out.println("negated class instances: " + negatedClassInstances); 170 } 171 172 if (accuracyMethod instanceof AccMethodWithBeta) { 173 ((AccMethodWithBeta)accuracyMethod).setBeta(coverageFactor); 174 } 175 176// System.out.println(classInstances.size() + " " + superClassInstances.size()); 177 } 178 179 @Override 180 public ClassScore computeScore(OWLClassExpression description, double noise) { 181 182 // TODO: reuse code to ensure that we never return inconsistent results 183 // between getAccuracy, getAccuracyOrTooWeak and computeScore 184 Coverage[] cc = ((ReasoningUtilsCLP)reasoningUtil).getCoverageCLP(description, classInstances, superClassInstances); 185 186 double recall = Heuristics.divideOrZero(cc[0].trueCount, classInstances.size()); // tp / (tp+fn) 187 double precision = Heuristics.divideOrZero(cc[0].trueCount, cc[0].trueCount + cc[1].trueCount); // tp / (tp+fp) 188 // for each OWLClassExpression with less than 100% coverage, we check whether it is 189 // leads to an inconsistent knowledge base 190 191 double acc; 192 if (accuracyMethod instanceof AccMethodTwoValued) { 193 acc = reasoningUtil.getAccuracyOrTooWeakExact2((AccMethodTwoValued) accuracyMethod, cc, noise); 194 } else if (accuracyMethod instanceof AccMethodThreeValued) { 195 acc = ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeakExact3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise); 196 } else { 197 throw new RuntimeException(); 198 } 199 200 if (checkConsistency) { 201 202 // we check whether the axiom already follows from the knowledge base 203 boolean followsFromKB = followsFromKB(description); 204 205 // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) 206 // (if the axiom follows, then the knowledge base remains consistent) 207 boolean isConsistent = followsFromKB || isConsistent(description); 208 209 return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc, isConsistent, followsFromKB); 210 211 } else { 212 return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc); 213 } 214 } 215 216 public boolean isEquivalenceProblem() { 217 return equivalence; 218 } 219 220 @Override 221 public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) { 222 nanoStartTime = System.nanoTime(); 223 if (accuracyMethod instanceof AccMethodThreeValued) { 224 return ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeak3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise); 225 } else if (accuracyMethod instanceof AccMethodTwoValued) { 226 return reasoningUtil.getAccuracyOrTooWeak2((AccMethodTwoValued) accuracyMethod, description, classInstances, superClassInstances, noise); 227 } else { 228 throw new RuntimeException("Method ClassExpressionLearningProblem::getAccuracyOrTooWeak doesn't handle accuracyMethod " + accuracyMethod); 229 } 230 } 231 232 /** 233 * @return whether the description test should be aborted because time expired 234 */ 235 public boolean terminationTimeExpired() { 236 boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds * 1000000000L)); 237 if (val) { 238 logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds."); 239 } 240 return val; 241 } 242 243 // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf 244 // for all methods below (currently dummies) 245 246 /** 247 * @return the class expression to describe 248 */ 249 public OWLClassExpression getClassExpressionToDescribe() { 250 return classExpressionToDescribe; 251 } 252 253 public void setClassExpressionToDescribe(OWLClassExpression classExpressionToDescribe) { 254 this.classExpressionToDescribe = classExpressionToDescribe; 255 } 256 257 public void setClassToDescribe(IRI classIRI) { 258 setClassExpressionToDescribe(df.getOWLClass(classIRI)); 259 } 260 261 /* (non-Javadoc) 262 * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description) 263 */ 264 @Override 265 public EvaluatedDescriptionClass evaluate(OWLClassExpression description, double noise) { 266 ClassScore score = computeScore(description, noise); 267 return new EvaluatedDescriptionClass(description, score); 268 } 269 270 /** 271 * @return whether the current ontology remains logically consistent if the given class expression is 272 * added OWL axiom, i.e. 273 * <ul> 274 * <li>SubClassOf(classToDescribe, ce) or</li> 275 * <li>EquivalentClass(classToDescribe, ce)</li> 276 * </ul> 277 * depending on the type of axiom set via {@link ClassExpressionLearningProblem#setEquivalence(boolean)} method. 278 * @see AbstractReasonerComponent#remainsSatisfiable(OWLAxiom) 279 */ 280 public boolean isConsistent(OWLClassExpression ce) { 281 OWLAxiom axiom = equivalence 282 ? df.getOWLEquivalentClassesAxiom(classExpressionToDescribe, ce) 283 : df.getOWLSubClassOfAxiom(classExpressionToDescribe, ce); 284 return getReasoner().remainsSatisfiable(axiom); 285 } 286 287 /** 288 * @return whether the given class expression as OWL axiom of type 289 * <ul> 290 * <li>SubClassOf(classToDescribe, ce) or</li> 291 * <li>EquivalentClass(classToDescribe, ce)</li> 292 * </ul> 293 * (depending on the type of axiom set via {@link ClassExpressionLearningProblem#setEquivalence(boolean)} method) 294 * can already be logically derived from existing axioms in the ontology, i.e. it's logically redundant. 295 * @see AbstractReasonerComponent#remainsSatisfiable(OWLAxiom) 296 */ 297 public boolean followsFromKB(OWLClassExpression description) { 298 return equivalence 299 ? getReasoner().isEquivalentClass(description, classExpressionToDescribe) 300 : getReasoner().isSuperClassOf(description, classExpressionToDescribe); 301 } 302 303 public int getMaxExecutionTimeInSeconds() { 304 return maxExecutionTimeInSeconds; 305 } 306 307 public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) { 308 this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; 309 } 310 311 public boolean isEquivalence() { 312 return equivalence; 313 } 314 315 public void setEquivalence(boolean equivalence) { 316 this.equivalence = equivalence; 317 } 318 319 public double getBetaSC() { 320 return betaSC; 321 } 322 323 public void setBetaSC(double betaSC) { 324 this.betaSC = betaSC; 325 } 326 327 public double getBetaEq() { 328 return betaEq; 329 } 330 331 public void setBetaEq(double betaEq) { 332 this.betaEq = betaEq; 333 } 334 335 public boolean isCheckConsistency() { 336 return checkConsistency; 337 } 338 339 public void setCheckConsistency(boolean checkConsistency) { 340 this.checkConsistency = checkConsistency; 341 } 342 343 public AccMethod getAccuracyMethod() { 344 return accuracyMethod; 345 } 346 347 @Autowired(required = false) 348 public void setAccuracyMethod(AccMethod accuracyMethod) { 349 this.accuracyMethod = accuracyMethod; 350 } 351 352 public double getRecall(OWLClassExpression description) { 353 ReasoningUtils.CoverageCount[] cc = reasoningUtil.getCoverageCount(description, classInstancesSet); 354 if (cc == null) { 355 return 0; 356 } 357 return cc[0].trueCount/(double)cc[0].total; 358 } 359} 360