Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.learningproblems;
020
021import com.google.common.collect.Sets;
022import org.dllearner.accuracymethods.*;
023import org.dllearner.core.*;
024import org.dllearner.core.config.ConfigOption;
025import org.dllearner.utilities.Helper;
026import org.dllearner.utilities.ReasoningUtils;
027import org.dllearner.utilities.ReasoningUtils.Coverage;
028import org.dllearner.utilities.ReasoningUtilsCLP;
029import org.semanticweb.owlapi.model.*;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032import org.springframework.beans.factory.annotation.Autowired;
033import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
034
035import java.util.*;
036
037/**
038 * The problem of learning the OWL class expression for another OWL class expression
039 * in an OWL ontology.
040 * This for example allows to learn domain or range of a property by descibing the following class expressions:
041 * <ul>
042 *     <li><code>Dom(p, C) -> âp.â¤</code> </li>
043 *     <li><code>Ran(p, C) -> âp^(-1).â¤</code></li>
044 * </ul>
045 *
046 * Note, this learning problem generalizes the {@link ClassLearningProblem}, which will be kept in the code for legacy
047 * reasons.
048 *
049 * @author Lorenz Buehmann
050 * @author Jens Lehmann
051 *
052 */
053@ComponentAnn(name = "ClassExpressionLearningProblem", shortName = "celp", version = 0.6)
054public class ClassExpressionLearningProblem extends AbstractClassExpressionLearningProblem<ClassScore> {
055
056        private static Logger logger = LoggerFactory.getLogger(ClassExpressionLearningProblem.class);
057        private long nanoStartTime;
058        @ConfigOption(defaultValue = "10",
059                      description = "Maximum execution time in seconds")
060        private int maxExecutionTimeInSeconds = 10;
061
062        @ConfigOption(description = "OWL class expression of which an OWL class expression should be learned",
063                      required = true)
064        private OWLClassExpression classExpressionToDescribe;
065
066        private List<OWLIndividual> classInstances;
067        private TreeSet<OWLIndividual> classInstancesSet;
068        @ConfigOption(defaultValue = "true",
069                      description = "Whether this is an equivalence problem (or superclass learning problem)")
070        private boolean equivalence = true;
071
072        @ConfigOption(description = "beta index for F-measure in super class learning",
073                      required = false,
074                      defaultValue = "3.0")
075        private double betaSC = 3.0;
076
077        @ConfigOption(description = "beta index for F-measure in definition learning",
078                      required = false,
079                      defaultValue = "1.0")
080        private double betaEq = 1.0;
081
082        // instances of super classes excluding instances of the class itself
083        private List<OWLIndividual> superClassInstances;
084        // instances of super classes including instances of the class itself
085        private List<OWLIndividual> classAndSuperClassInstances;
086        // specific variables for generalised F-measure
087        private TreeSet<OWLIndividual> negatedClassInstances;
088
089        @ConfigOption(description = "Specifies, which method/function to use for computing accuracy. Available measues are \"pred_acc\" (predictive accuracy), \"fmeasure\" (F measure), \"generalised_fmeasure\" (generalised F-Measure according to Fanizzi and d'Amato).",
090                      defaultValue = "PRED_ACC")
091        protected AccMethod accuracyMethod;
092
093        @ConfigOption(description = "whether to check for consistency of suggestions (when added to ontology)",
094                      required = false,
095                      defaultValue = "true")
096        private boolean checkConsistency = true;
097
098        private OWLDataFactory df = new OWLDataFactoryImpl();
099
100        public ClassExpressionLearningProblem() {}
101
102        public ClassExpressionLearningProblem(AbstractReasonerComponent reasoner) {
103                super(reasoner);
104        }
105
106        @Override
107        protected ReasoningUtils newReasoningUtils(AbstractReasonerComponent reasoner) {
108                return new ReasoningUtils(reasoner);
109        }
110
111        @Override
112        public void init() throws ComponentInitException {
113
114                if (accuracyMethod != null && accuracyMethod instanceof AccMethodPredAccApprox) {
115                        logger.warn("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works.");
116                }
117
118                // check if entities in signature of class expression occur in ontology
119                if (!Helper.checkConceptEntities(getReasoner(), classExpressionToDescribe)) {
120                        throw new ComponentInitException("Some entities in \"" + classExpressionToDescribe + "\" do not exist. Make sure you spelled it correctly.");
121                }
122
123                classInstances = new LinkedList<>(getReasoner().getIndividuals(classExpressionToDescribe));
124                // sanity check
125                if (classInstances.size() == 0) {
126                        throw new ComponentInitException("Class " + classExpressionToDescribe + " has 0 instances according to \"" +
127                                        AnnComponentManager.getName(getReasoner().getClass()) + "\". Cannot perform class learning with 0 instances.");
128                }
129
130                classInstancesSet = new TreeSet<>(classInstances);
131
132                double coverageFactor;
133                if (equivalence) {
134                        coverageFactor = betaEq;
135                } else {
136                        coverageFactor = betaSC;
137                }
138
139                // we compute the instances of the super class to perform
140                // optimisations later on
141                Set<OWLClassExpression> superClasses = getReasoner().getSuperClasses(classExpressionToDescribe);
142                TreeSet<OWLIndividual> superClassInstancesTmp = new TreeSet<>(getReasoner().getIndividuals());
143                for (OWLClassExpression superClass : superClasses) {
144                        superClassInstancesTmp.retainAll(getReasoner().getIndividuals(superClass));
145                }
146                // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ...
147                classAndSuperClassInstances = new LinkedList<>(superClassInstancesTmp);
148                // ... and a second list not including them
149                superClassInstancesTmp.removeAll(classInstances);
150                // since we use the instance list for approximations, we want to avoid
151                // any bias through URI names, so we shuffle the list once pseudo-randomly
152                superClassInstances = new LinkedList<>(superClassInstancesTmp);
153                Random rand = new Random(1);
154                Collections.shuffle(classInstances, rand);
155                Collections.shuffle(superClassInstances, rand);
156
157
158                if (accuracyMethod == null) {
159                        accuracyMethod = new AccMethodPredAcc(true);
160                }
161
162                if (accuracyMethod instanceof AccMethodApproximate) {
163                        ((AccMethodApproximate) accuracyMethod).setReasoner(getReasoner());
164                }
165
166                if (accuracyMethod instanceof AccMethodThreeValued) {
167                        Coverage[] cc = reasoningUtil.getCoverage(df.getOWLObjectComplementOf(classExpressionToDescribe), superClassInstances);
168                        negatedClassInstances = Sets.newTreeSet(cc[0].trueSet);
169//                      System.out.println("negated class instances: " + negatedClassInstances);
170                }
171
172                if (accuracyMethod instanceof AccMethodWithBeta) {
173                        ((AccMethodWithBeta)accuracyMethod).setBeta(coverageFactor);
174                }
175
176//              System.out.println(classInstances.size() + " " + superClassInstances.size());
177        }
178
179        @Override
180        public ClassScore computeScore(OWLClassExpression description, double noise) {
181
182                // TODO: reuse code to ensure that we never return inconsistent results
183                // between getAccuracy, getAccuracyOrTooWeak and computeScore
184                Coverage[] cc = ((ReasoningUtilsCLP)reasoningUtil).getCoverageCLP(description, classInstances, superClassInstances);
185
186                double recall = Heuristics.divideOrZero(cc[0].trueCount, classInstances.size()); // tp / (tp+fn)
187                double precision = Heuristics.divideOrZero(cc[0].trueCount, cc[0].trueCount + cc[1].trueCount); // tp / (tp+fp)
188                // for each OWLClassExpression with less than 100% coverage, we check whether it is
189                // leads to an inconsistent knowledge base
190
191                double acc;
192                if (accuracyMethod instanceof AccMethodTwoValued) {
193                        acc = reasoningUtil.getAccuracyOrTooWeakExact2((AccMethodTwoValued) accuracyMethod, cc, noise);
194                } else if (accuracyMethod instanceof AccMethodThreeValued) {
195                        acc = ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeakExact3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise);
196                } else {
197                        throw new RuntimeException();
198                }
199
200                if (checkConsistency) {
201
202                        // we check whether the axiom already follows from the knowledge base
203                        boolean followsFromKB = followsFromKB(description);
204
205                        // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
206                        // (if the axiom follows, then the knowledge base remains consistent)
207                        boolean isConsistent = followsFromKB || isConsistent(description);
208
209                        return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc, isConsistent, followsFromKB);
210
211                } else {
212                        return new ClassScore(cc[0].trueSet, cc[0].falseSet, recall, cc[1].trueSet, precision, acc);
213                }
214        }
215
216        public boolean isEquivalenceProblem() {
217                return equivalence;
218        }
219
220        @Override
221        public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) {
222                nanoStartTime = System.nanoTime();
223                if (accuracyMethod instanceof AccMethodThreeValued) {
224                        return ((ReasoningUtilsCLP)reasoningUtil).getAccuracyOrTooWeak3((AccMethodThreeValued) accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise);
225                } else if (accuracyMethod instanceof  AccMethodTwoValued) {
226                        return reasoningUtil.getAccuracyOrTooWeak2((AccMethodTwoValued) accuracyMethod, description, classInstances, superClassInstances, noise);
227                } else {
228                        throw new RuntimeException("Method ClassExpressionLearningProblem::getAccuracyOrTooWeak doesn't handle accuracyMethod "  + accuracyMethod);
229                }
230        }
231
232        /**
233         * @return whether the description test should be aborted because time expired
234         */
235        public boolean terminationTimeExpired() {
236                boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds * 1000000000L));
237                if (val) {
238                        logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds.");
239                }
240                return val;
241        }
242
243        // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
244        // for all methods below (currently dummies)
245
246        /**
247         * @return the class expression to describe
248         */
249        public OWLClassExpression getClassExpressionToDescribe() {
250                return classExpressionToDescribe;
251        }
252
253        public void setClassExpressionToDescribe(OWLClassExpression classExpressionToDescribe) {
254                this.classExpressionToDescribe = classExpressionToDescribe;
255        }
256
257        public void setClassToDescribe(IRI classIRI) {
258                setClassExpressionToDescribe(df.getOWLClass(classIRI));
259        }
260
261        /* (non-Javadoc)
262         * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
263         */
264        @Override
265        public EvaluatedDescriptionClass evaluate(OWLClassExpression description, double noise) {
266                ClassScore score = computeScore(description, noise);
267                return new EvaluatedDescriptionClass(description, score);
268        }
269
270        /**
271         * @return whether the current ontology remains logically consistent if the given class expression is
272         * added OWL axiom, i.e.
273         * <ul>
274         *     <li>SubClassOf(classToDescribe, ce) or</li>
275         *     <li>EquivalentClass(classToDescribe, ce)</li>
276         * </ul>
277         * depending on the type of axiom set via {@link ClassExpressionLearningProblem#setEquivalence(boolean)} method.
278         * @see AbstractReasonerComponent#remainsSatisfiable(OWLAxiom)
279         */
280        public boolean isConsistent(OWLClassExpression ce) {
281                OWLAxiom axiom = equivalence
282                                ? df.getOWLEquivalentClassesAxiom(classExpressionToDescribe, ce)
283                                : df.getOWLSubClassOfAxiom(classExpressionToDescribe, ce);
284                return getReasoner().remainsSatisfiable(axiom);
285        }
286
287        /**
288         * @return whether the given class expression as OWL axiom of type
289         * <ul>
290         *     <li>SubClassOf(classToDescribe, ce) or</li>
291         *     <li>EquivalentClass(classToDescribe, ce)</li>
292         * </ul>
293         * (depending on the type of axiom set via {@link ClassExpressionLearningProblem#setEquivalence(boolean)} method)
294         * can already be logically derived from existing axioms in the ontology, i.e. it's logically redundant.
295         * @see AbstractReasonerComponent#remainsSatisfiable(OWLAxiom)
296         */
297        public boolean followsFromKB(OWLClassExpression description) {
298                return equivalence
299                                ? getReasoner().isEquivalentClass(description, classExpressionToDescribe)
300                                : getReasoner().isSuperClassOf(description, classExpressionToDescribe);
301        }
302
303        public int getMaxExecutionTimeInSeconds() {
304                return maxExecutionTimeInSeconds;
305        }
306
307        public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) {
308                this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds;
309        }
310
311        public boolean isEquivalence() {
312                return equivalence;
313        }
314
315        public void setEquivalence(boolean equivalence) {
316                this.equivalence = equivalence;
317        }
318
319        public double getBetaSC() {
320                return betaSC;
321        }
322
323        public void setBetaSC(double betaSC) {
324                this.betaSC = betaSC;
325        }
326
327        public double getBetaEq() {
328                return betaEq;
329        }
330
331        public void setBetaEq(double betaEq) {
332                this.betaEq = betaEq;
333        }
334
335        public boolean isCheckConsistency() {
336                return checkConsistency;
337        }
338
339        public void setCheckConsistency(boolean checkConsistency) {
340                this.checkConsistency = checkConsistency;
341        }
342
343        public AccMethod getAccuracyMethod() {
344                return accuracyMethod;
345        }
346
347        @Autowired(required = false)
348        public void setAccuracyMethod(AccMethod accuracyMethod) {
349                this.accuracyMethod = accuracyMethod;
350        }
351
352        public double getRecall(OWLClassExpression description) {
353                ReasoningUtils.CoverageCount[] cc = reasoningUtil.getCoverageCount(description, classInstancesSet);
354                if (cc == null) {
355                        return 0;
356                }
357                return cc[0].trueCount/(double)cc[0].total;
358        }
359}
360