001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.learningproblems;
020
021import com.google.common.collect.Sets;
022import org.dllearner.algorithms.celoe.CELOE;
023import org.dllearner.core.*;
024import org.dllearner.core.StringRenderer.Rendering;
025import org.dllearner.core.config.ConfigOption;
026import org.dllearner.kb.OWLAPIOntology;
027import org.dllearner.learningproblems.Heuristics.HeuristicType;
028import org.dllearner.reasoning.ClosedWorldReasoner;
029import org.dllearner.reasoning.OWLAPIReasoner;
030import org.dllearner.reasoning.ReasonerImplementation;
031import org.dllearner.utilities.Helper;
032import org.dllearner.utilities.owl.OWLClassExpressionUtils;
033import org.semanticweb.owlapi.apibinding.OWLManager;
034import org.semanticweb.owlapi.model.IRI;
035import org.semanticweb.owlapi.model.OWLClass;
036import org.semanticweb.owlapi.model.OWLClassExpression;
037import org.semanticweb.owlapi.model.OWLOntology;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl;
041
042import java.io.File;
043import java.util.Set;
044import java.util.SortedSet;
045import java.util.TreeSet;
046
047/**
048 * A learning problem in which positive and negative examples are classes, i.e.
049 * the whole learning is done on the schema level.
050 * 
051 * Instead of doing instance checks to compute the quality of a given class
052 * expression, we
053 * check for subclass relationship.
054 * 
055 * @author Lorenz Buehmann
056 *
057 */
058@ComponentAnn(name = "Class as Instance LP", shortName = "classasinstance", version = 0.1)
059public class ClassAsInstanceLearningProblem extends AbstractClassExpressionLearningProblem<ScorePosNeg<OWLClass>> {
060
061        private static final Logger logger = LoggerFactory.getLogger(ClassAsInstanceLearningProblem.class);
062
063        @ConfigOption(description = "Percent Per Length Unit", defaultValue = "0.05", required = false)
064        private double percentPerLengthUnit = 0.05;
065
066        private HeuristicType heuristic = HeuristicType.PRED_ACC;
067
068        @ConfigOption()
069        protected Set<OWLClass> positiveExamples = new TreeSet<>();
070        @ConfigOption()
071        protected Set<OWLClass> negativeExamples = new TreeSet<>();
072        
073        
074
075        /* (non-Javadoc)
076         * @see org.dllearner.core.Component#init()
077         */
078        @Override
079        public void init() throws ComponentInitException {
080                if (positiveExamples.isEmpty()) {
081                        logger.warn("No positive examples set");
082                }
083                if (negativeExamples.isEmpty()) {
084                        logger.warn("No negative examples set");
085                }
086                if (reasoner != null) {
087                        Set<OWLClass> allClasses = reasoner.getClasses();
088                        Set<OWLClass> allExamples = Sets.union(positiveExamples, negativeExamples);
089                        if (!allClasses.containsAll(allExamples)) {
090                                Set<OWLClass> missing = Sets.difference(allExamples, allClasses);
091                                double percentage = (double) missing.size() / allExamples.size();
092                                percentage = Math.round(percentage * 1000.0) / 1000.0;
093                                String str =
094                                                "The examples (" + (percentage * 100) + " % of total) " +
095                                                                "below are not contained in the knowledge base " +
096                                                                "(check spelling and prefixes)\n";
097                                str += missing.toString();
098
099                                if(missing.size()==allExamples.size())    {
100                                        throw new ComponentInitException(str);
101                                } else if(percentage < 0.10) {
102                                        logger.warn(str);
103                                } else {
104                                        logger.error(str);
105                                }
106                        }
107                }
108
109                initialized = true;
110        }
111
112        /* (non-Javadoc)
113         * @see org.dllearner.core.AbstractLearningProblem#computeScore(org.dllearner.core.owl.Description)
114         */
115        @Override
116        public ScorePosNeg<OWLClass> computeScore(OWLClassExpression description, double noise) {
117                SortedSet<OWLClass> posAsPos = new TreeSet<>();
118                SortedSet<OWLClass> posAsNeg = new TreeSet<>();
119                SortedSet<OWLClass> negAsPos = new TreeSet<>();
120                SortedSet<OWLClass> negAsNeg = new TreeSet<>();
121
122                // for each positive example, we check whether it is a subclass of the given concept
123                for (OWLClass example : positiveExamples) {
124                        if (getReasoner().isSuperClassOf(description, example)) {
125                                posAsPos.add(example);
126                        } else {
127                                posAsNeg.add(example);
128                        }
129                }
130                // for each negative example, we check whether it is not a subclass of the given concept
131                for (OWLClass example : negativeExamples) {
132                        if (getReasoner().isSuperClassOf(description, example)) {
133                                negAsPos.add(example);
134                        } else {
135                                negAsNeg.add(example);
136                        }
137                }
138
139                // compute the accuracy
140                double accuracy = getAccuracyOrTooWeak(description);
141
142                return new ScoreTwoValued<>(OWLClassExpressionUtils.getLength(description), percentPerLengthUnit, posAsPos, posAsNeg,
143                                negAsPos, negAsNeg, accuracy);
144        }
145
146        /* (non-Javadoc)
147         * @see org.dllearner.core.AbstractLearningProblem#evaluate(org.dllearner.core.owl.Description)
148         */
149        @Override
150        public EvaluatedDescription evaluate(OWLClassExpression description) {
151                ScorePosNeg<OWLClass> score = computeScore(description);
152                return new EvaluatedDescriptionPosNeg(description, score);
153        }
154
155        /* (non-Javadoc)
156         * @see org.dllearner.core.AbstractLearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double)
157         */
158        @Override
159        public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) {
160                return getAccuracyOrTooWeakExact(description, noise);
161        }
162
163        public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
164                switch (heuristic) {
165                case PRED_ACC:
166                        return getPredAccuracyOrTooWeakExact(description, noise);
167                case FMEASURE:
168                        return getFMeasureOrTooWeakExact(description, noise);
169                default:
170                        throw new Error("Heuristic " + heuristic + " not implemented.");
171                }
172        }
173
174        public double getPredAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
175
176                int maxNotCovered = (int) Math.ceil(noise * positiveExamples.size());
177
178                int notCoveredPos = 0;
179                int notCoveredNeg = 0;
180
181                for (OWLClass example : positiveExamples) {
182                        if (!getReasoner().isSuperClassOf(description, example)) {
183                                notCoveredPos++;
184
185                                if (notCoveredPos >= maxNotCovered) {
186                                        return -1;
187                                }
188                        }
189                }
190                for (OWLClass example : negativeExamples) {
191                        if (!getReasoner().isSuperClassOf(description, example)) {
192                                notCoveredNeg++;
193                        }
194                }
195
196                int tp = positiveExamples.size() - notCoveredPos;
197                int tn = notCoveredNeg;
198                int fp = notCoveredPos;
199                int fn = negativeExamples.size() - notCoveredNeg;
200
201                return (tp + tn) / (double) (tp + fp + tn + fn);
202        }
203
204        public double getFMeasureOrTooWeakExact(OWLClassExpression description, double noise) {
205                int additionalInstances = 0;
206                for (OWLClass example : negativeExamples) {
207                        if (getReasoner().isSuperClassOf(description, example)) {
208                                additionalInstances++;
209                        }
210                }
211
212                int coveredInstances = 0;
213                for (OWLClass example : positiveExamples) {
214                        if (getReasoner().isSuperClassOf(description, example)) {
215                                coveredInstances++;
216                        }
217                }
218
219                double recall = coveredInstances / (double) positiveExamples.size();
220
221                if (recall < 1 - noise) {
222                        return -1;
223                }
224
225                double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances
226                                / (double) (coveredInstances + additionalInstances);
227
228                return Heuristics.getFScore(recall, precision);
229        }
230
231        /**
232         * @param positiveExamples the positiveExamples to set
233         */
234        public void setPositiveExamples(Set<OWLClass> positiveExamples) {
235                this.positiveExamples = positiveExamples;
236        }
237
238        /**
239         * @return the positiveExamples
240         */
241        public Set<OWLClass> getPositiveExamples() {
242                return positiveExamples;
243        }
244
245        /**
246         * @param negativeExamples the negativeExamples to set
247         */
248        public void setNegativeExamples(Set<OWLClass> negativeExamples) {
249                this.negativeExamples = negativeExamples;
250        }
251
252        /**
253         * @return the negativeExamples
254         */
255        public Set<OWLClass> getNegativeExamples() {
256                return negativeExamples;
257        }
258
259        /**
260         * @return the percentPerLengthUnit
261         */
262        public double getPercentPerLengthUnit() {
263                return percentPerLengthUnit;
264        }
265
266        /**
267         * @param percentPerLengthUnit the percentPerLengthUnit to set
268         */
269        public void setPercentPerLengthUnit(double percentPerLengthUnit) {
270                this.percentPerLengthUnit = percentPerLengthUnit;
271        }
272        
273        public static void main(String[] args) throws Exception{
274                StringRenderer.setRenderer(Rendering.DL_SYNTAX);
275                File file = new File("../examples/father.owl");
276                OWLClass cls1 = new OWLClassImpl(IRI.create("http://example.com/father#male"));
277                OWLClass cls2 = new OWLClassImpl(IRI.create("http://example.com/father#female"));
278                
279                OWLOntology ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(file);
280                
281                AbstractKnowledgeSource ks = new OWLAPIOntology(ontology);
282                ks.init();
283                
284                OWLAPIReasoner baseReasoner = new OWLAPIReasoner(ks);
285                baseReasoner.setReasonerImplementation(ReasonerImplementation.HERMIT);
286        baseReasoner.init();
287                ClosedWorldReasoner rc = new ClosedWorldReasoner(ks);
288                rc.setReasonerComponent(baseReasoner);
289                rc.init();
290                
291                ClassAsInstanceLearningProblem lp = new ClassAsInstanceLearningProblem();
292                lp.setPositiveExamples(Sets.newHashSet(cls1, cls2));
293                lp.setReasoner(rc);
294                lp.init();
295                
296                CELOE alg = new CELOE(lp, rc);
297                alg.setMaxExecutionTimeInSeconds(10);
298                alg.setWriteSearchTree(true);
299                alg.setSearchTreeFile("log/search-tree.log");
300                alg.setReplaceSearchTree(true);
301                alg.init();
302                
303                alg.start();
304                
305        }
306
307}