001package org.dllearner.accuracymethods;
002
003import org.dllearner.core.ComponentAnn;
004import org.dllearner.core.Reasoner;
005import org.dllearner.core.config.ConfigOption;
006import org.dllearner.learningproblems.Heuristics;
007import org.semanticweb.owlapi.model.OWLClassExpression;
008import org.semanticweb.owlapi.model.OWLIndividual;
009
010import java.util.Collection;
011
012@ComponentAnn(name = "AMeasure Approximate", shortName = "approx.ameasure", version = 0.1)
013public class AccMethodAMeasureApprox extends AccMethodAMeasure implements AccMethodTwoValuedApproximate {
014
015        @ConfigOption(description = "(configured by learning problem)")
016        private Reasoner reasoner;
017        @ConfigOption(description = "The Approximate Delta", defaultValue = "0.05")
018        private double approxDelta = 0.05;
019
020        @Override
021        public double getApproxDelta() {
022                return approxDelta;
023        }
024
025        @Override
026        public void setApproxDelta(double approxDelta) {
027                this.approxDelta = approxDelta;
028        }
029
030        @Override
031        public void setReasoner(Reasoner reasoner) {
032                this.reasoner = reasoner;
033        }
034
035        @Override
036        public double getAccApprox2(OWLClassExpression description,
037                        Collection<OWLIndividual> classInstances,
038                        Collection<OWLIndividual> superClassInstances,
039                        double noise) {
040                // we abort when there are too many uncovered positives
041                int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
042                int instancesCovered = 0;
043                int instancesNotCovered = 0;
044                int total = 0;
045                boolean estimatedA = false;
046
047                double lowerBorderA = 0;
048                int lowerEstimateA = 0;
049                double upperBorderA = 1;
050                int upperEstimateA = classInstances.size();
051
052                for(OWLIndividual ind : classInstances) {
053                        if(reasoner.hasType(description, ind)) {
054                                instancesCovered++;
055                        } else {
056                                instancesNotCovered ++;
057                                if(instancesNotCovered > maxNotCovered) {
058                                        return -1;
059                                }
060                        }
061
062                        // approximation step (starting after 10 tests)
063                        total = instancesCovered + instancesNotCovered;
064                        if(total > 10) {
065                                // compute confidence interval
066                                double p1 = Heuristics.p1(instancesCovered, total);
067                                double p2 = Heuristics.p3(p1, total);
068                                lowerBorderA = Math.max(0, p1 - p2);
069                                upperBorderA = Math.min(1, p1 + p2);
070                                double size = upperBorderA - lowerBorderA;
071                                // if the interval has a size smaller than 10%, we can be confident
072                                if(size < 2 * approxDelta) {
073                                        // we have to distinguish the cases that the accuracy limit is
074                                        // below, within, or above the limit and that the mean is below
075                                        // or above the limit
076                                        double mean = instancesCovered/(double)total;
077
078                                        // we can estimate the best possible concept to reach with downward refinement
079                                        // by setting precision to 1 and recall = mean stays as it is
080                                        double optimumEstimate = beta == 0 ? mean : (beta*mean+1)/(beta+1);
081
082                                        // if the mean is greater than the required minimum, we can accept;
083                                        // we also accept if the interval is small and close to the minimum
084                                        // (worst case is to accept a few inaccurate descriptions)
085                                        if(optimumEstimate > 1-noise-0.03) {
086                                                //                                                              || (upperBorderA > mean && size < 0.03)) {
087                                                instancesCovered = (int) (instancesCovered/(double)total * classInstances.size());
088                                                upperEstimateA = (int) (upperBorderA * classInstances.size());
089                                                lowerEstimateA = (int) (lowerBorderA * classInstances.size());
090                                                estimatedA = true;
091                                                break;
092                                        }
093
094                                        // reject only if the upper border is far away (we are very
095                                        // certain not to lose a potential solution)
096                                        //                                              if(upperBorderA + 0.1 < 1-noise) {
097                                        double optimumEstimateUpperBorder = beta == 0 ? upperBorderA+0.1 : (beta*(upperBorderA+0.1)+1)/(beta+1);
098                                        if(optimumEstimateUpperBorder < 1 - noise) {
099                                                return -1;
100                                        }
101                                }
102                        }
103                }
104
105                double recall = instancesCovered/(double)classInstances.size();
106
107                //                      MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
108                //                      MonitorFactory.add("aInstances","count", total);
109
110                // we know that a definition candidate is always subclass of the
111                // intersection of all super classes, so we test only the relevant instances
112                // (leads to undesired effects for descriptions not following this rule,
113                // but improves performance a lot);
114                // for learning a superclass of a defined class, similar observations apply;
115
116                int testsPerformed = 0;
117                int instancesDescription = 0;
118                //                      boolean estimatedB = false;
119
120                for(OWLIndividual ind : superClassInstances) {
121
122                        if(reasoner.hasType(description, ind)) {
123                                instancesDescription++;
124                        }
125
126                        testsPerformed++;
127
128                        if(testsPerformed > 10) {
129
130                                // compute confidence interval
131                                double p1 = Heuristics.p1(instancesDescription, testsPerformed);
132                                double p2 = Heuristics.p3(p1, testsPerformed);
133                                double lowerBorder = Math.max(0, p1 - p2);
134                                double upperBorder = Math.min(1, p1 + p2);
135                                int lowerEstimate = (int) (lowerBorder * superClassInstances.size());
136                                int upperEstimate = (int) (upperBorder * superClassInstances.size());
137
138                                double size;
139                                if(estimatedA) {
140                                        //                                              size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate)));
141                                        if (beta == 0) {
142                                                size = Heuristics.getAScore(upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate)) - Heuristics.getAScore(lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate));
143                                        } else {
144                                                size = Heuristics.getAScore(upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate), beta) - Heuristics.getAScore(lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate), beta);
145                                        }
146                                } else {
147                                        if (beta == 0) {
148                                                size = Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + lowerEstimate)) - Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + upperEstimate));
149                                        } else {
150                                                //                                              size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
151                                                size = Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + lowerEstimate), beta) - Heuristics.getAScore(recall, instancesCovered / (double) (instancesCovered + upperEstimate), beta);
152                                        }
153                                }
154
155                                if(size < 0.1) {
156                                        //                                              System.out.println(instancesDescription + " of " + testsPerformed);
157                                        //                                              System.out.println("interval from " + lowerEstimate + " to " + upperEstimate);
158                                        //                                              System.out.println("size: " + size);
159
160                                        //                                              estimatedB = true;
161                                        // calculate total number of instances
162                                        instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size());
163                                        break;
164                                }
165                        }
166                }
167
168                // since we measured/estimated accuracy only on instances outside A (superClassInstances
169                // does not include instances of A), we need to add it in the denominator
170                double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
171                if(instancesCovered + instancesDescription == 0) {
172                        precision = 0;
173                }
174
175                if (beta == 0) {
176                        return Heuristics.getAScore(recall, precision);
177                } else {
178                        return Heuristics.getAScore(recall, precision, beta);
179                }
180
181        }
182        
183}