Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.learningproblems;
020
021import com.google.common.collect.Sets;
022import org.dllearner.accuracymethods.*;
023import org.dllearner.core.*;
024import org.dllearner.core.config.ConfigOption;
025import org.dllearner.utilities.ReasoningUtils;
026import org.dllearner.utilities.ReasoningUtils.Coverage;
027import org.dllearner.utilities.ReasoningUtilsCLP;
028import org.semanticweb.owlapi.model.*;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031import org.springframework.beans.factory.annotation.Autowired;
032import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
033
034import java.util.*;
035
036/**
037 * The problem of learning the OWL class expression of an existing class
038 * in an OWL ontology.
039 *
040 * @author Jens Lehmann
041 *
042 */
043@ComponentAnn(name = "ClassLearningProblem", shortName = "clp", version = 0.6)
044public class ClassLearningProblem extends AbstractClassExpressionLearningProblem<ClassScore> {
045
046        private static Logger logger = LoggerFactory.getLogger(ClassLearningProblem.class);
047        private long nanoStartTime;
048        @ConfigOption(defaultValue = "10",
049                      description = "Maximum execution time in seconds")
050        private int maxExecutionTimeInSeconds = 10;
051
052        @ConfigOption(description = "class of which an OWL class expression should be learned",
053                      required = true)
054        private OWLClass classToDescribe;
055
056        private List<OWLIndividual> classInstances;
057        private TreeSet<OWLIndividual> classInstancesSet;
058        @ConfigOption(defaultValue = "true",
059                      description = "Whether this is an equivalence problem (or superclass learning problem)")
060        private boolean equivalence = true;
061
062        @ConfigOption(description = "beta index for F-measure in super class learning",
063                      required = false,
064                      defaultValue = "3.0")
065        private double betaSC = 3.0;
066
067        @ConfigOption(description = "beta index for F-measure in definition learning",
068                      required = false,
069                      defaultValue = "1.0")
070        private double betaEq = 1.0;
071
072        // instances of super classes excluding instances of the class itself
073        private List<OWLIndividual> superClassInstances;
074        // instances of super classes including instances of the class itself
075        private List<OWLIndividual> classAndSuperClassInstances;
076        // specific variables for generalised F-measure
077        private TreeSet<OWLIndividual> negatedClassInstances;
078
079        @ConfigOption(description = "Specifies, which method/function to use for computing accuracy. Available measues are \"pred_acc\" (predictive accuracy), \"fmeasure\" (F measure), \"generalised_fmeasure\" (generalised F-Measure according to Fanizzi and d'Amato).",
080                      defaultValue = "PRED_ACC")
081        protected AccMethod accuracyMethod;
082
083        @ConfigOption(description = "whether to check for consistency of suggestions (when added to ontology)",
084                      required = false,
085                      defaultValue = "true")
086        private boolean checkConsistency = true;
087
088        private OWLDataFactory df = new OWLDataFactoryImpl();
089
090        public ClassLearningProblem() {
091
092        }
093
094        @Override
095        protected ReasoningUtils newReasoningUtils(AbstractReasonerComponent reasoner) {
096                return new ReasoningUtilsCLP(this, reasoner);
097        }
098
099        public ClassLearningProblem(AbstractReasonerComponent reasoner) {
100                super(reasoner);
101        }
102
103        @Override
104        public void init() throws ComponentInitException {
105
106                if (accuracyMethod != null && accuracyMethod instanceof AccMethodPredAccApprox) {
107                        logger.warn("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works.");
108                }
109
110                if (!getReasoner().getClasses().contains(classToDescribe)) {
111                        throw new ComponentInitException("The class \"" + classToDescribe + "\" does not exist. Make sure you spelled it correctly.");
112                }
113
114                classInstances = new LinkedList<>(getReasoner().getIndividuals(classToDescribe));
115                // sanity check
116                if (classInstances.size() == 0) {
117                        throw new ComponentInitException("Class " + classToDescribe + " has 0 instances according to \"" + AnnComponentManager.getName(getReasoner().getClass()) + "\". Cannot perform class learning with 0 instances.");
118                }
119
120                classInstancesSet = new TreeSet<>(classInstances);
121
122                double coverageFactor;
123                if (equivalence) {
124                        coverageFactor = betaEq;
125                } else {
126                        coverageFactor = betaSC;
127                }
128
129                // we compute the instances of the super class to perform
130                // optimisations later on
131                Set<OWLClassExpression> superClasses = getReasoner().getSuperClasses(classToDescribe);
132                TreeSet<OWLIndividual> superClassInstancesTmp = new TreeSet<>(getReasoner().getIndividuals());
133                for (OWLClassExpression superClass : superClasses) {
134                        superClassInstancesTmp.retainAll(getReasoner().getIndividuals(superClass));
135                }
136                // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ...
137                classAndSuperClassInstances = new LinkedList<>(superClassInstancesTmp);
138                // ... and a second list not including them
139                superClassInstancesTmp.removeAll(classInstances);
140                // since we use the instance list for approximations, we want to avoid
141                // any bias through URI names, so we shuffle the list once pseudo-randomly
142                superClassInstances = new LinkedList<>(superClassInstancesTmp);
143                Random rand = new Random(1);
144                Collections.shuffle(classInstances, rand);
145                Collections.shuffle(superClassInstances, rand);
146
147                if (accuracyMethod == null) {
148                        accuracyMethod = new AccMethodPredAcc(true);
149                }
150                if (accuracyMethod instanceof AccMethodApproximate) {
151                        ((AccMethodApproximate) accuracyMethod).setReasoner(getReasoner());
152                }
153                if (accuracyMethod instanceof AccMethodThreeValued) {
154                        Coverage[] cc = reasoningUtil.getCoverage(df.getOWLObjectComplementOf(classToDescribe), superClassInstances);
155                        negatedClassInstances = Sets.newTreeSet(cc[0].trueSet);
156//                      System.out.println("negated class instances: " + negatedClassInstances);
157                }
158                if (accuracyMethod instanceof AccMethodWithBeta) {
159                        ((AccMethodWithBeta)accuracyMethod).setBeta(coverageFactor);
160                }
161
162//              System.out.println(classInstances.size() + " " + superClassInstances.size());
163                
164                initialized = true;
165        }
166
167        @Override
168        public ClassScore computeScore(OWLClassExpression description, double noise) {
169
170                // TODO: reuse code to ensure that we never return inconsistent results
171                // between getAccuracy, getAccuracyOrTooWeak and computeScore
172                Coverage[] cc = ((ReasoningUtilsCLP)reasoningUtil).getCoverageCLP(description, classInstances, superClassInstances);
173
174                double recall = Heuristics.divideOrZero(cc[0].trueCount, classInstances.size()); // tp / (tp+fn)
175                double precision = Heuristics.divideOrZero(cc[0].trueCount, cc[0].trueCount + cc[1].trueCount); // tp / (tp+fp)
176                // for each OWLClassExpression with less than 100% coverage, we check whether it is
177                // leads to an inconsistent knowledge base
178
179                double acc;
180                if (accuracyMethod instanceof AccMethodTwoValued) {
181                        acc = reasoningUtil.getAccuracyOrTooWeakExact2((AccMethodTwoValued) accuracyMethod, cc, noise);
182                } else if (accuracyMethod instanceof AccMethodThreeValued) {
183                        acc = ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeakExact3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise);
184                } else {
185                        throw new RuntimeException();
186                }
187
188                if (checkConsistency) {
189
190                        // we check whether the axiom already follows from the knowledge base
191                        boolean followsFromKB = followsFromKB(description);
192
193                        // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
194                        // (if the axiom follows, then the knowledge base remains consistent)
195                        boolean isConsistent = followsFromKB || isConsistent(description);
196
197                        return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc, isConsistent, followsFromKB);
198
199                } else {
200                        return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc);
201                }
202        }
203
204        public boolean isEquivalenceProblem() {
205                return equivalence;
206        }
207
208        @Override
209        public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) {
210                nanoStartTime = System.nanoTime();
211                if (accuracyMethod instanceof AccMethodThreeValued) {
212                        return ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeak3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise);
213                } else if (accuracyMethod instanceof  AccMethodTwoValued) {
214                        return reasoningUtil.getAccuracyOrTooWeak2((AccMethodTwoValued) accuracyMethod, description, classInstances, superClassInstances, noise);
215                } else {
216                        throw new RuntimeException();
217                }
218        }
219
220        /**
221         * @return whether the description test should be aborted because time expired
222         */
223        public boolean terminationTimeExpired() {
224                boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds * 1000000000L));
225                if (val) {
226                        logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds.");
227                }
228                return val;
229        }
230
231        // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
232        // for all methods below (currently dummies)
233
234        /**
235         * @return the classToDescribe
236         */
237        public OWLClass getClassToDescribe() {
238                return classToDescribe;
239        }
240
241        public void setClassToDescribe(OWLClass classToDescribe) {
242                this.classToDescribe = classToDescribe;
243        }
244
245        public void setClassToDescribe(IRI classIRI) {
246                this.classToDescribe = df.getOWLClass(classIRI);
247        }
248
249        /* (non-Javadoc)
250         * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
251         */
252        @Override
253        public EvaluatedDescriptionClass evaluate(OWLClassExpression description, double noise) {
254                ClassScore score = computeScore(description, noise);
255                return new EvaluatedDescriptionClass(description, score);
256        }
257
258        /**
259         * @return the isConsistent
260         */
261        public boolean isConsistent(OWLClassExpression description) {
262                OWLAxiom axiom;
263                if (equivalence) {
264                        axiom = df.getOWLEquivalentClassesAxiom(classToDescribe, description);
265                } else {
266                        axiom = df.getOWLSubClassOfAxiom(classToDescribe, description);
267                }
268                return getReasoner().remainsSatisfiable(axiom);
269        }
270
271        public boolean followsFromKB(OWLClassExpression description) {
272                return equivalence ? getReasoner().isEquivalentClass(description, classToDescribe) : getReasoner().isSuperClassOf(description, classToDescribe);
273        }
274
275        public int getMaxExecutionTimeInSeconds() {
276                return maxExecutionTimeInSeconds;
277        }
278
279        public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) {
280                this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds;
281        }
282
283        public boolean isEquivalence() {
284                return equivalence;
285        }
286
287        public void setEquivalence(boolean equivalence) {
288                this.equivalence = equivalence;
289        }
290
291        public double getBetaSC() {
292                return betaSC;
293        }
294
295        public void setBetaSC(double betaSC) {
296                this.betaSC = betaSC;
297        }
298
299        public double getBetaEq() {
300                return betaEq;
301        }
302
303        public void setBetaEq(double betaEq) {
304                this.betaEq = betaEq;
305        }
306
307        public boolean isCheckConsistency() {
308                return checkConsistency;
309        }
310
311        public void setCheckConsistency(boolean checkConsistency) {
312                this.checkConsistency = checkConsistency;
313        }
314
315        public AccMethod getAccuracyMethod() {
316                return accuracyMethod;
317        }
318
319        @Autowired(required = false)
320        public void setAccuracyMethod(AccMethod accuracyMethod) {
321                this.accuracyMethod = accuracyMethod;
322        }
323
324        public double getRecall(OWLClassExpression description) {
325                ReasoningUtils.CoverageCount[] cc = reasoningUtil.getCoverageCount(description, classInstancesSet);
326                if (cc == null) {
327                        return 0;
328                }
329                return cc[0].trueCount/(double)cc[0].total;
330        }
331}
332