001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.learningproblems; 020 021import org.dllearner.core.AbstractReasonerComponent; 022import org.dllearner.core.ComponentAnn; 023import org.dllearner.core.ComponentInitException; 024import org.dllearner.core.EvaluatedDescription; 025import org.dllearner.core.config.ConfigOption; 026import org.dllearner.core.owl.fuzzydll.FuzzyIndividual; 027import org.dllearner.learningproblems.Heuristics.HeuristicType; 028import org.dllearner.utilities.owl.OWLClassExpressionUtils; 029import org.semanticweb.owlapi.model.OWLClassExpression; 030import org.semanticweb.owlapi.model.OWLIndividual; 031import org.semanticweb.owlapi.model.OWLNamedIndividual; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.util.Iterator; 036import java.util.SortedSet; 037import java.util.TreeSet; 038 039/** 040 * The aim of this learning problem is to learn a concept definition such that 041 * the positive examples and the negative examples do not follow. It is 042 * 2-valued, because we only distinguish between covered and non-covered 043 * examples. (A 3-valued problem distinguishes between covered examples, 044 * examples covered by the negation of the concept, and all other examples.) The 045 * 2-valued learning problem is often more useful for Description Logics due to 046 * (the Open World Assumption and) the fact that negative knowledge, e.g. that a 047 * person does not have a child, is or cannot be expressed. 048 * 049 * @author Jens Lehmann 050 * 051 */ 052@ComponentAnn(name = "FuzzyPosNegLPStandard", shortName = "fuzzyPosNeg", version = 0.2) 053public class FuzzyPosNegLPStandard extends FuzzyPosNegLP { 054 055 private static final Logger logger = LoggerFactory.getLogger(FuzzyPosNegLPStandard.class); 056 057 // approximation and F-measure 058 // (taken from class learning => super class instances corresponds to negative examples 059 // and class instances to positive examples) 060 private double approxDelta = 0.05; 061 private boolean useApproximations; 062// private boolean useFMeasure; 063 private boolean useOldDIGOptions = false; 064 065 private HeuristicType heuristic = HeuristicType.PRED_ACC; 066 067 private int errorIndex = 0; 068 069 @ConfigOption(description = "Specifies, which method/function to use for computing accuracy. Available measues are \"PRED_ACC\" (predictive accuracy), \"FMEASURE\" (F measure), \"GEN_FMEASURE\" (generalised F-Measure according to Fanizzi and d'Amato).",defaultValue = "PRED_ACC") 070 private HeuristicType accuracyMethod = HeuristicType.PRED_ACC; 071 072 public FuzzyPosNegLPStandard() {} 073 074 public FuzzyPosNegLPStandard(AbstractReasonerComponent reasoningService) { 075 super(reasoningService); 076 } 077 078 public FuzzyPosNegLPStandard(AbstractReasonerComponent reasoningService, SortedSet<OWLIndividual> positiveExamples, SortedSet<OWLIndividual> negativeExamples) { 079 super(reasoningService); 080 this.positiveExamples = positiveExamples; 081 this.negativeExamples = negativeExamples; 082 } 083 084 @Override 085 public void init() throws ComponentInitException { 086 super.init(); 087 088 if(useApproximations && accuracyMethod.equals(HeuristicType.PRED_ACC)) { 089 logger.warn("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first and verify that it works."); 090 } 091 092 initialized = true; 093 } 094 095 @Override 096 public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) { 097 // delegates to the appropriate methods 098 return useApproximations ? getAccuracyOrTooWeakApprox(description, noise) : getAccuracyOrTooWeakExact(description, noise); 099 } 100 101 private double getAccuracyOrTooWeakApprox(OWLClassExpression description, double noise) { 102 if(heuristic.equals(HeuristicType.PRED_ACC)) { 103 int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size()); 104 105 int notCoveredPos = 0; 106// int notCoveredNeg = 0; 107 108 int posClassifiedAsPos = 0; 109 int negClassifiedAsNeg = 0; 110 111 int nrOfPosChecks = 0; 112 int nrOfNegChecks = 0; 113 114 // special case: we test positive and negative examples in turn 115 Iterator<OWLIndividual> itPos = positiveExamples.iterator(); 116 Iterator<OWLIndividual> itNeg = negativeExamples.iterator(); 117 118 do { 119 // in each loop we pick 0 or 1 positives and 0 or 1 negative 120 // and classify it 121 122 if(itPos.hasNext()) { 123 OWLIndividual posExample = itPos.next(); 124// System.out.println(posExample); 125 126 if(getReasoner().hasType(description, posExample)) { 127 posClassifiedAsPos++; 128 } else { 129 notCoveredPos++; 130 } 131 nrOfPosChecks++; 132 133 // take noise into account 134 if(notCoveredPos > maxNotCovered) { 135 return -1; 136 } 137 } 138 139 if(itNeg.hasNext()) { 140 OWLIndividual negExample = itNeg.next(); 141 if(!getReasoner().hasType(description, negExample)) { 142 negClassifiedAsNeg++; 143 } 144 nrOfNegChecks++; 145 } 146 147 // compute how accurate our current approximation is and return if it is sufficiently accurate 148 double[] approx = Heuristics.getPredAccApproximation(positiveExamples.size(), negativeExamples.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg); 149 if(approx[1]<approxDelta) { 150// System.out.println(approx[0]); 151 return approx[0]; 152 } 153 154 } while(itPos.hasNext() || itNeg.hasNext()); 155 156 double ret = Heuristics.getPredictiveAccuracy(positiveExamples.size(), negativeExamples.size(), posClassifiedAsPos, negClassifiedAsNeg, 1); 157 return ret; 158 159 } else if(heuristic.equals(HeuristicType.FMEASURE)) { 160// System.out.println("Testing " + description); 161 162 // we abort when there are too many uncovered positives 163 int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size()); 164 int instancesCovered = 0; 165 int instancesNotCovered = 0; 166 167 for(OWLIndividual ind : positiveExamples) { 168 if(getReasoner().hasType(description, ind)) { 169 instancesCovered++; 170 } else { 171 instancesNotCovered ++; 172 if(instancesNotCovered > maxNotCovered) { 173 return -1; 174 } 175 } 176 } 177 178 double recall = instancesCovered/(double)positiveExamples.size(); 179 180 int testsPerformed = 0; 181 int instancesDescription = 0; 182 183 for(OWLIndividual ind : negativeExamples) { 184 185 if(getReasoner().hasType(description, ind)) { 186 instancesDescription++; 187 } 188 testsPerformed++; 189 190 // check whether approximation is sufficiently accurate 191 double[] approx = Heuristics.getFScoreApproximation(instancesCovered, recall, 1, negativeExamples.size(), testsPerformed, instancesDescription); 192 if(approx[1]<approxDelta) { 193 return approx[0]; 194 } 195 196 } 197 198 // standard computation (no approximation) 199 double precision = instancesCovered/(double)(instancesDescription+instancesCovered); 200// if(instancesCovered + instancesDescription == 0) { 201// precision = 0; 202// } 203 return Heuristics.getFScore(recall, precision, 1); 204 } else { 205 throw new Error("Approximation for " + heuristic + " not implemented."); 206 } 207 } 208 209 private double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { 210 if(heuristic.equals(HeuristicType.PRED_ACC)) { 211 return getPredAccuracyOrTooWeakExact(description, noise); 212 } else if(heuristic.equals(HeuristicType.FMEASURE)) { 213 return getFMeasureOrTooWeakExact(description, noise); 214 /* 215 // computing R(C) restricted to relevant instances 216 int additionalInstances = 0; 217 for(OWLIndividual ind : negativeExamples) { 218 if(reasoner.hasType(description, ind)) { 219 additionalInstances++; 220 } 221 } 222 223 // computing R(A) 224 int coveredInstances = 0; 225 for(OWLIndividual ind : positiveExamples) { 226 if(reasoner.hasType(description, ind)) { 227 coveredInstances++; 228 } 229 } 230 231 double recall = coveredInstances/(double)positiveExamples.size(); 232 double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); 233 234 return Heuristics.getFScore(recall, precision); 235 */ 236 } else { 237 throw new Error("Heuristic " + heuristic + " not implemented."); 238 } 239 } 240 241 /* (non-Javadoc) 242 * @see org.dllearner.core.LearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double) 243 */ 244 private double getPredAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { 245 246 // System.out.println(errorIndex++); 247 248 // double crispAccuracy = crispAccuracy(description, noise); 249 // if I erase next line, fuzzy reasoning fails 250// if (crispAccuracy == -1) { 251// System.out.print(description); 252// System.out.println(); 253// // return -1; 254// } 255 256 // BEGIN 257 // added by Josue 258 // fuzzy extension 259// double posMembership = 0; 260// double negMembership = 0; 261 double descriptionMembership = 0; 262 // double accumulatedSingleMembership = 0; 263 double nonAccumulativeDescriptionMembership; 264 double accumulativeDescriptionMembership = 0; 265 266// System.out.println("noise = " + noise); 267 268 // int individualCounter = fuzzyExamples.size(); 269 double individualCounter = totalTruth; 270 for (FuzzyIndividual fuzzyExample : fuzzyExamples) { 271 // accumulatedSingleMembership += singleMembership; 272 nonAccumulativeDescriptionMembership = 1 - Math.abs(fuzzyExample.getTruthDegree() - getReasoner().hasTypeFuzzyMembership(description, fuzzyExample)); 273 descriptionMembership += nonAccumulativeDescriptionMembership; 274 individualCounter -= fuzzyExample.getTruthDegree(); 275 if ((accumulativeDescriptionMembership + (nonAccumulativeDescriptionMembership * fuzzyExample.getTruthDegree()) + individualCounter) < ((1 - noise) * totalTruth)) 276 return -1; 277 accumulativeDescriptionMembership += nonAccumulativeDescriptionMembership * fuzzyExample.getTruthDegree(); 278 279 } 280 281 double fuzzyAccuracy = descriptionMembership / fuzzyExamples.size(); 282 283// System.err.println("crispAccuracy = fuzzyAccuracy"); 284// crispAccuracy = fuzzyAccuracy; 285 286// if (crispAccuracy != fuzzyAccuracy) { 287// System.err.println("***********************************************"); 288// //System.err.println("* " + (errorIndex++)); 289// System.err.println("* (crispAccuracy[" + crispAccuracy + "] != fuzzyAccuracy[" + fuzzyAccuracy + "])"); 290// System.err.println("* DESC: " + description); 291// System.err.println("***********************************************"); 292// Scanner sc = new Scanner(System.in); 293// sc.nextLine(); 294// } 295 296 return fuzzyAccuracy; 297 } 298 299 // added by Josue 300 private double crispAccuracy(OWLClassExpression description, double noise) { 301 int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size()); 302 303 int notCoveredPos = 0; 304 int notCoveredNeg = 0; 305 306 for (OWLIndividual example : positiveExamples) { 307 if (!getReasoner().hasType(description, example)) { 308 notCoveredPos++; 309 if(notCoveredPos >= maxNotCovered) { 310 return -1; 311 } 312 } 313 } 314 for (OWLIndividual example : negativeExamples) { 315 if (!getReasoner().hasType(description, example)) { 316 notCoveredNeg++; 317 } 318 } 319 return (positiveExamples.size() - notCoveredPos + notCoveredNeg) / (double) allExamples.size(); 320 } 321 322 // added by Josue 323 private double crispfMeasure(OWLClassExpression description, double noise) { 324 // crisp F-measure 325 int additionalInstances = 0; 326 for(OWLIndividual ind : negativeExamples) { 327 if(getReasoner().hasType(description, ind)) { 328 additionalInstances++; 329 } 330 } 331 332 int coveredInstances = 0; 333 for(OWLIndividual ind : positiveExamples) { 334 if(getReasoner().hasType(description, ind)) { 335 coveredInstances++; 336 } 337 } 338 339 double recall = coveredInstances/(double)positiveExamples.size(); 340 341 if(recall < 1 - noise) { 342 return -1; 343 } 344 345 double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); 346 347 return Heuristics.getFScore(recall, precision); 348 } 349 350 private double getFMeasureOrTooWeakExact(OWLClassExpression description, double noise) { 351 352 // added by Josue 353 // fuzzy F-measure 354 double coveredMembershipDegree = 0; 355 double totalMembershipDegree = 0; 356 double invertedCoveredMembershipDegree = 0; 357 double lastMembershipDegree; 358 359 for (FuzzyIndividual ind: fuzzyExamples) { 360 lastMembershipDegree = (1 - Math.abs(ind.getTruthDegree() - getReasoner().hasTypeFuzzyMembership(description, ind))); 361 coveredMembershipDegree += lastMembershipDegree * ind.getTruthDegree(); 362 totalMembershipDegree += ind.getTruthDegree(); 363 invertedCoveredMembershipDegree += (1 - ind.getTruthDegree()) * (1 - lastMembershipDegree); 364 } 365 double fuzzyRecall = totalMembershipDegree == 0 ? 0 :coveredMembershipDegree/totalMembershipDegree; 366 367 if(fuzzyRecall < 1 - noise) { 368 return -1; 369 } 370 double fuzzyPrecision = (coveredMembershipDegree + invertedCoveredMembershipDegree) == 0 ? 0: coveredMembershipDegree / (coveredMembershipDegree + invertedCoveredMembershipDegree); 371 double fuzzyFmeasure = Heuristics.getFScore(fuzzyRecall, fuzzyPrecision); 372 373 // double crispFmeasure = crispfMeasure(description, noise); 374 375 // crispFmeasure = fuzzyFmeasure; 376 377// if (crispFmeasure != fuzzyFmeasure) { 378// System.err.println("************************"); 379// System.err.println("* crispFmeasuer = " + crispFmeasure); 380// System.err.println("* fuzzyFmeasuer = " + fuzzyFmeasure); 381// System.err.println("************************"); 382// Scanner sc = new Scanner(System.in); 383// sc.nextLine(); 384// } 385 386 return fuzzyFmeasure; 387 } 388 389 // instead of using the standard operation, we use optimisation 390 // and approximation here; 391 // now deprecated because the Heuristics helper class is used 392 @Deprecated 393 public double getFMeasureOrTooWeakApprox(OWLClassExpression description, double noise) { 394 // we abort when there are too many uncovered positives 395 int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size()); 396 int instancesCovered = 0; 397 int instancesNotCovered = 0; 398 int total; 399 boolean estimatedA = false; 400 401 double lowerBorderA = 0; 402 int lowerEstimateA = 0; 403 double upperBorderA = 1; 404 int upperEstimateA = positiveExamples.size(); 405 406 for(OWLIndividual ind : positiveExamples) { 407 if(getReasoner().hasType(description, ind)) { 408 instancesCovered++; 409 } else { 410 instancesNotCovered ++; 411 if(instancesNotCovered > maxNotCovered) { 412 return -1; 413 } 414 } 415 416 // approximation step (starting after 10 tests) 417 total = instancesCovered + instancesNotCovered; 418 if(total > 10) { 419 // compute confidence interval 420 double p1 = Heuristics.p1(instancesCovered, total); 421 double p2 = Heuristics.p3(p1, total); 422 lowerBorderA = Math.max(0, p1 - p2); 423 upperBorderA = Math.min(1, p1 + p2); 424 double size = upperBorderA - lowerBorderA; 425 // if the interval has a size smaller than 10%, we can be confident 426 if(size < 2 * approxDelta) { 427 // we have to distinguish the cases that the accuracy limit is 428 // below, within, or above the limit and that the mean is below 429 // or above the limit 430 double mean = instancesCovered/(double)total; 431 432 // if the mean is greater than the required minimum, we can accept; 433 // we also accept if the interval is small and close to the minimum 434 // (worst case is to accept a few inaccurate descriptions) 435 if(mean > 1-noise || (upperBorderA > mean && size < 0.03)) { 436 instancesCovered = (int) (instancesCovered/(double)total * positiveExamples.size()); 437 upperEstimateA = (int) (upperBorderA * positiveExamples.size()); 438 lowerEstimateA = (int) (lowerBorderA * positiveExamples.size()); 439 estimatedA = true; 440 break; 441 } 442 443 // reject only if the upper border is far away (we are very 444 // certain not to lose a potential solution) 445 if(upperBorderA + 0.1 < 1-noise) { 446 return -1; 447 } 448 } 449 } 450 } 451 452 double recall = instancesCovered/(double)positiveExamples.size(); 453 454// MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); 455// MonitorFactory.add("aInstances","count", total); 456 457 // we know that a definition candidate is always subclass of the 458 // intersection of all super classes, so we test only the relevant instances 459 // (leads to undesired effects for descriptions not following this rule, 460 // but improves performance a lot); 461 // for learning a superclass of a defined class, similar observations apply; 462 463 int testsPerformed = 0; 464 int instancesDescription = 0; 465// boolean estimatedB = false; 466 467 for(OWLIndividual ind : negativeExamples) { 468 469 if(getReasoner().hasType(description, ind)) { 470 instancesDescription++; 471 } 472 473 testsPerformed++; 474 475 if(testsPerformed > 10) { 476 477 // compute confidence interval 478 double p1 = Heuristics.p1(instancesDescription, testsPerformed); 479 double p2 = Heuristics.p3(p1, testsPerformed); 480 double lowerBorder = Math.max(0, p1 - p2); 481 double upperBorder = Math.min(1, p1 + p2); 482 int lowerEstimate = (int) (lowerBorder * negativeExamples.size()); 483 int upperEstimate = (int) (upperBorder * negativeExamples.size()); 484 485 double size; 486 if(estimatedA) { 487 size = getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)); 488 } else { 489 size = getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate)); 490 } 491 492 if(size < 0.1) { 493 instancesDescription = (int) (instancesDescription/(double)testsPerformed * negativeExamples.size()); 494 break; 495 } 496 } 497 } 498 499 double precision = instancesCovered/(double)(instancesDescription+instancesCovered); 500 if(instancesCovered + instancesDescription == 0) { 501 precision = 0; 502 } 503 504// System.out.println("description: " + description); 505// System.out.println("recall: " + recall); 506// System.out.println("precision: " + precision); 507// System.out.println("F-measure: " + getFMeasure(recall, precision)); 508// System.out.println("exact: " + getAccuracyOrTooWeakExact(description, noise)); 509 510 return getFMeasure(recall, precision); 511 } 512 513 514 /* (non-Javadoc) 515 * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description) 516 */ 517 @Override 518 public EvaluatedDescription evaluate(OWLClassExpression description) { 519 ScorePosNeg score = computeScore(description); 520 return new EvaluatedDescriptionPosNeg(description, score); 521 } 522 523 private double getFMeasure(double recall, double precision) { 524 return 2 * precision * recall / (precision + recall); 525 } 526 527 public double getApproxDelta() { 528 return approxDelta; 529 } 530 531 public void setApproxDelta(double approxDelta) { 532 this.approxDelta = approxDelta; 533 } 534 535 public boolean isUseApproximations() { 536 return useApproximations; 537 } 538 539 public void setUseApproximations(boolean useApproximations) { 540 this.useApproximations = useApproximations; 541 } 542 543 public HeuristicType getHeuristic() { 544 return heuristic; 545 } 546 547 public void setHeuristic(HeuristicType heuristic) { 548 this.heuristic = heuristic; 549 } 550 551 /** 552 * @param accuracyMethod the accuracy method to set 553 */ 554 public void setAccuracyMethod(HeuristicType accuracyMethod) { 555 this.accuracyMethod = accuracyMethod; 556 } 557 558 559 public double getAccuracy(int posAsPos, int posAsNeg, int negAsPos, int negAsNeg, double noise) { 560 int maxNotCovered = (int) Math.ceil(noise * positiveExamples.size()); 561 562 if(posAsNeg > maxNotCovered) { 563 return -1; 564 } 565 566 switch (accuracyMethod) { 567 case PRED_ACC: 568 return (posAsPos + negAsNeg) / (double) allExamples.size(); 569 case FMEASURE: 570 double recall = posAsPos / (double)positiveExamples.size(); 571 572 if(recall < 1 - noise) { 573 return -1; 574 } 575 576 double precision = (negAsPos + posAsPos == 0) ? 0 : posAsPos / (double) (posAsPos + negAsPos); 577 578 return Heuristics.getFScore(recall, precision); 579 default: 580 throw new Error("Heuristic " + accuracyMethod + " not implemented."); 581 } 582 583 } 584 585 /* (non-Javadoc) 586 * @see org.dllearner.core.AbstractLearningProblem#computeScore(org.semanticweb.owlapi.model.OWLObject, double) 587 */ 588 @Override 589 public ScorePosNeg<OWLNamedIndividual> computeScore(OWLClassExpression concept, double noise) { 590 SortedSet<OWLIndividual> posAsPos = new TreeSet<>(); 591 SortedSet<OWLIndividual> posAsNeg = new TreeSet<>(); 592 SortedSet<OWLIndividual> negAsPos = new TreeSet<>(); 593 SortedSet<OWLIndividual> negAsNeg = new TreeSet<>(); 594 595 for (OWLIndividual example : positiveExamples) { 596 if (getReasoner().hasType(concept, example)) { 597 posAsPos.add(example); 598 } else { 599 posAsNeg.add(example); 600 } 601 } 602 for (OWLIndividual example : negativeExamples) { 603 if (getReasoner().hasType(concept, example)) 604 negAsPos.add(example); 605 else 606 negAsNeg.add(example); 607 } 608 609 // TODO: this computes accuracy twice - more elegant method should be implemented 610 double accuracy = getAccuracy(posAsPos.size(), posAsNeg.size(), negAsPos.size(), negAsNeg.size(), noise); 611 612 return new ScoreTwoValued( 613 OWLClassExpressionUtils.getLength(concept), 614 getPercentPerLengthUnit(), 615 posAsPos, posAsNeg, negAsPos, negAsNeg, 616 accuracy); 617 } 618 619}