001package org.dllearner.accuracymethods; 002 003import org.dllearner.core.ComponentAnn; 004import org.dllearner.core.Reasoner; 005import org.dllearner.core.config.ConfigOption; 006import org.dllearner.learningproblems.Heuristics; 007import org.semanticweb.owlapi.model.OWLClassExpression; 008import org.semanticweb.owlapi.model.OWLIndividual; 009 010import java.util.Collection; 011 012@ComponentAnn(name = "AMeasure Approximate", shortName = "approx.ameasure", version = 0.1) 013public class AccMethodAMeasureApprox extends AccMethodAMeasure implements AccMethodTwoValuedApproximate { 014 015 @ConfigOption(description = "(configured by learning problem)") 016 private Reasoner reasoner; 017 @ConfigOption(description = "The Approximate Delta", defaultValue = "0.05") 018 private double approxDelta = 0.05; 019 020 @Override 021 public double getApproxDelta() { 022 return approxDelta; 023 } 024 025 @Override 026 public void setApproxDelta(double approxDelta) { 027 this.approxDelta = approxDelta; 028 } 029 030 @Override 031 public void setReasoner(Reasoner reasoner) { 032 this.reasoner = reasoner; 033 } 034 035 @Override 036 public double getAccApprox2(OWLClassExpression description, 037 Collection<OWLIndividual> classInstances, 038 Collection<OWLIndividual> superClassInstances, 039 double noise) { 040 // we abort when there are too many uncovered positives 041 int maxNotCovered = (int) Math.ceil(noise*classInstances.size()); 042 int instancesCovered = 0; 043 int instancesNotCovered = 0; 044 int total = 0; 045 boolean estimatedA = false; 046 047 double lowerBorderA = 0; 048 int lowerEstimateA = 0; 049 double upperBorderA = 1; 050 int upperEstimateA = classInstances.size(); 051 052 for(OWLIndividual ind : classInstances) { 053 if(reasoner.hasType(description, ind)) { 054 instancesCovered++; 055 } else { 056 instancesNotCovered ++; 057 if(instancesNotCovered > maxNotCovered) { 058 return -1; 059 } 060 } 061 062 // approximation step (starting after 10 tests) 063 total = instancesCovered + instancesNotCovered; 064 if(total > 10) { 065 // compute confidence interval 066 double p1 = Heuristics.p1(instancesCovered, total); 067 double p2 = Heuristics.p3(p1, total); 068 lowerBorderA = Math.max(0, p1 - p2); 069 upperBorderA = Math.min(1, p1 + p2); 070 double size = upperBorderA - lowerBorderA; 071 // if the interval has a size smaller than 10%, we can be confident 072 if(size < 2 * approxDelta) { 073 // we have to distinguish the cases that the accuracy limit is 074 // below, within, or above the limit and that the mean is below 075 // or above the limit 076 double mean = instancesCovered/(double)total; 077 078 // we can estimate the best possible concept to reach with downward refinement 079 // by setting precision to 1 and recall = mean stays as it is 080 double optimumEstimate = beta == 0 ? mean : (beta*mean+1)/(beta+1); 081 082 // if the mean is greater than the required minimum, we can accept; 083 // we also accept if the interval is small and close to the minimum 084 // (worst case is to accept a few inaccurate descriptions) 085 if(optimumEstimate > 1-noise-0.03) { 086 // || (upperBorderA > mean && size < 0.03)) { 087 instancesCovered = (int) (instancesCovered/(double)total * classInstances.size()); 088 upperEstimateA = (int) (upperBorderA * classInstances.size()); 089 lowerEstimateA = (int) (lowerBorderA * classInstances.size()); 090 estimatedA = true; 091 break; 092 } 093 094 // reject only if the upper border is far away (we are very 095 // certain not to lose a potential solution) 096 // if(upperBorderA + 0.1 < 1-noise) { 097 double optimumEstimateUpperBorder = beta == 0 ? upperBorderA+0.1 : (beta*(upperBorderA+0.1)+1)/(beta+1); 098 if(optimumEstimateUpperBorder < 1 - noise) { 099 return -1; 100 } 101 } 102 } 103 } 104 105 double recall = instancesCovered/(double)classInstances.size(); 106 107 // MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); 108 // MonitorFactory.add("aInstances","count", total); 109 110 // we know that a definition candidate is always subclass of the 111 // intersection of all super classes, so we test only the relevant instances 112 // (leads to undesired effects for descriptions not following this rule, 113 // but improves performance a lot); 114 // for learning a superclass of a defined class, similar observations apply; 115 116 int testsPerformed = 0; 117 int instancesDescription = 0; 118 // boolean estimatedB = false; 119 120 for(OWLIndividual ind : superClassInstances) { 121 122 if(reasoner.hasType(description, ind)) { 123 instancesDescription++; 124 } 125 126 testsPerformed++; 127 128 if(testsPerformed > 10) { 129 130 // compute confidence interval 131 double p1 = Heuristics.p1(instancesDescription, testsPerformed); 132 double p2 = Heuristics.p3(p1, testsPerformed); 133 double lowerBorder = Math.max(0, p1 - p2); 134 double upperBorder = Math.min(1, p1 + p2); 135 int lowerEstimate = (int) (lowerBorder * superClassInstances.size()); 136 int upperEstimate = (int) (upperBorder * superClassInstances.size()); 137 138 double size; 139 if(estimatedA) { 140 // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); 141 if (beta == 0) { 142 size = Heuristics.getAScore(upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate)) - Heuristics.getAScore(lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate)); 143 } else { 144 size = Heuristics.getAScore(upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate), beta) - Heuristics.getAScore(lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate), beta); 145 } 146 } else { 147 if (beta == 0) { 148 size = Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + lowerEstimate)) - Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + upperEstimate)); 149 } else { 150 // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); 151 size = Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + lowerEstimate), beta) - Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + upperEstimate), beta); 152 } 153 } 154 155 if(size < 0.1) { 156 // System.out.println(instancesDescription + " of " + testsPerformed); 157 // System.out.println("interval from " + lowerEstimate + " to " + upperEstimate); 158 // System.out.println("size: " + size); 159 160 // estimatedB = true; 161 // calculate total number of instances 162 instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size()); 163 break; 164 } 165 } 166 } 167 168 // since we measured/estimated accuracy only on instances outside A (superClassInstances 169 // does not include instances of A), we need to add it in the denominator 170 double precision = instancesCovered/(double)(instancesDescription+instancesCovered); 171 if(instancesCovered + instancesDescription == 0) { 172 precision = 0; 173 } 174 175 if (beta == 0) { 176 return Heuristics.getAScore(recall, precision); 177 } else { 178 return Heuristics.getAScore(recall, precision, beta); 179 } 180 181 } 182 183}