001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.el;
020
021import com.jamonapi.Monitor;
022import com.jamonapi.MonitorFactory;
023import org.apache.log4j.Logger;
024import org.dllearner.core.*;
025import org.dllearner.core.config.ConfigOption;
026import org.dllearner.core.owl.ClassHierarchy;
027import org.dllearner.core.owl.DatatypePropertyHierarchy;
028import org.dllearner.core.owl.ObjectPropertyHierarchy;
029import org.dllearner.learningproblems.ClassLearningProblem;
030import org.dllearner.refinementoperators.ELDown;
031import org.dllearner.utilities.Files;
032import org.dllearner.utilities.Helper;
033import org.dllearner.utilities.OWLAPIUtils;
034import org.dllearner.utilities.owl.EvaluatedDescriptionSet;
035import org.dllearner.utilities.owl.OWLClassExpressionMinimizer;
036import org.dllearner.utilities.owl.OWLClassExpressionUtils;
037import org.semanticweb.owlapi.manchestersyntax.parser.ManchesterOWLSyntaxParserException;
038import org.semanticweb.owlapi.model.OWLClass;
039import org.semanticweb.owlapi.model.OWLClassExpression;
040
041import java.io.File;
042import java.util.List;
043import java.util.Set;
044import java.util.TreeSet;
045
046/**
047 * A learning algorithm for EL, which is based on an
048 * ideal refinement operator.
049 * 
050 * TODO redundancy check
051 * 
052 * @author Jens Lehmann
053 *
054 */
055@ComponentAnn(name="ELTL", shortName="eltl", version=0.5, description="ELTL is an algorithm based on the refinement operator in http://jens-lehmann.org/files/2009/el_ilp.pdf.")
056public class ELLearningAlgorithm extends AbstractCELA {
057
058        private static Logger logger = Logger.getLogger(ELLearningAlgorithm.class);     
059        
060        @ConfigOption(required=false, defaultValue="true", description="Specifies whether to use real disjointness checks or instance based ones (no common instances) in the refinement operator.")
061        private boolean instanceBasedDisjoints = true;
062        
063        @ConfigOption(defaultValue="false", description="algorithm will terminate immediately when a correct definition is found")
064        private boolean stopOnFirstDefinition = false;
065        
066        @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples")
067        private double noisePercentage = 0.0;
068        
069        @ConfigOption(defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.")
070        private OWLClassExpression startClass;
071        
072        @ConfigOption(defaultValue="false", description="specifies whether to write a search tree")
073        private boolean writeSearchTree = false;
074
075        @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree")
076        private String searchTreeFile = "log/searchTree.txt";
077
078        @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree")
079        private boolean replaceSearchTree = false;
080        
081        @ConfigOption(defaultValue="2",description="The maximum depth for class expressions to test")
082        private int maxClassExpressionDepth = 2;
083        
084        @ConfigOption(defaultValue="10",description="Sets the maximum number of results one is interested in")
085        private int maxNrOfResults = 10;
086        
087        private Set<OWLClass> ignoredConcepts = null;
088        
089        @ConfigOption(description="class of which an OWL class expression should be learned")
090        private OWLClass classToDescribe;
091                
092        private double noise;
093        
094        // a set with limited size (currently the ordering is defined in the class itself)
095        private SearchTreeNode startNode;
096        @ConfigOption(defaultValue="StableHeuristic", description="The heuristic variable to use for ELTL")
097        private ELHeuristic heuristic;
098        private TreeSet<SearchTreeNode> candidates;
099        private ELDown operator;
100
101        private boolean isEquivalenceProblem = true;
102        private Monitor timeMonitor;
103        
104        double max = -1d;
105        OWLClassExpression maxDescription;
106
107        public ELLearningAlgorithm() {}
108        
109        public ELLearningAlgorithm(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner) {
110                super(problem, reasoner);
111        }
112
113        @Override
114        public void init() throws ComponentInitException {
115                // currently we use the stable heuristic
116                if(heuristic == null){
117                        heuristic = new StableHeuristic();
118                }
119                
120                candidates = new TreeSet<>(heuristic);
121                
122                ClassHierarchy classHierarchy = initClassHierarchy();
123                ObjectPropertyHierarchy obHierarchy = initObjectPropertyHierarchy();
124                DatatypePropertyHierarchy dpHierarchy = initDataPropertyHierarchy();
125                
126                operator = new ELDown(reasoner, instanceBasedDisjoints, classHierarchy, obHierarchy, dpHierarchy);
127                operator.setMaxClassExpressionDepth(maxClassExpressionDepth);
128                operator.init();
129                
130                noise = noisePercentage/100d;
131                
132                bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults);
133
134                minimizer = new OWLClassExpressionMinimizer(dataFactory, reasoner);
135                
136                timeMonitor = MonitorFactory.getTimeMonitor("eltl-time");
137                
138                initialized = true;
139        }       
140        
141        @Override
142        public void start() {
143                stop = false;
144                isRunning = true;
145                reset();
146                nanoStartTime = System.nanoTime();
147                
148                // create start node
149                if(startClass == null){
150                        startClass = dataFactory.getOWLThing();
151                } else {
152                        try {
153                                this.startClass = OWLAPIUtils.classExpressionPropertyExpander(startClass, reasoner, dataFactory);
154                        } catch (ManchesterOWLSyntaxParserException e) {
155                                logger.info("Error parsing startClass: " + e.getMessage());
156                                this.startClass = dataFactory.getOWLThing();
157                        }
158                }
159                logger.info("Start class: " + startClass);
160
161                ELDescriptionTree top = new ELDescriptionTree(reasoner, startClass);
162                addDescriptionTree(top, null);
163                
164                double highestAccuracy = 0.0;
165                
166                // main loop
167                int loop = 0;
168                while(!stop && !stoppingCriteriaSatisfied()) {
169                        // pick the best candidate according to the heuristic
170                        SearchTreeNode best = candidates.pollLast();
171                        
172                        // apply operator
173                        List<ELDescriptionTree> refinements = operator.refine(best.getDescriptionTree());
174                        
175                        // add all refinements to search tree, candidates, best descriptions
176                        for(ELDescriptionTree refinement : refinements) {
177                                addDescriptionTree(refinement, best);
178                        }
179                        
180                        // logging
181                        if(logger.isTraceEnabled()) {
182                                logger.trace("Chosen node " + best);
183                                logger.trace(startNode.getTreeString(renderer));
184                                logger.trace("Loop " + loop + " completed.");
185                        }
186                        
187                        if(bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) {
188                                highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();
189                                long durationInMillis = getCurrentRuntimeInMilliSeconds();
190                                String durationStr = getDurationAsString(durationInMillis);
191                                logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found after " + durationStr + ": " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription()));
192                        }
193                        
194                        if(writeSearchTree) {
195                                writeSearchTree();
196                        }
197                        
198                }
199                
200                // print solution(s)
201                logger.info("solutions[time: " + Helper.prettyPrintNanoSeconds(System.nanoTime()-nanoStartTime) + "]\n" + getSolutionString());
202                
203                isRunning = false;
204        }
205
206        // evaluates a class expression in tree form
207        private void addDescriptionTree(ELDescriptionTree descriptionTree, SearchTreeNode parentNode) {
208                // create search tree node
209                SearchTreeNode node = new SearchTreeNode(descriptionTree);
210                
211                // convert tree to standard class expression
212                OWLClassExpression classExpression = descriptionTree.transformToClassExpression();
213                
214                if(classExpression.equals(startClass) || isDescriptionAllowed(classExpression)){
215
216                        // rewrite class expression
217                        classExpression = rewrite(classExpression);
218
219                        // compute score
220                        Score score = learningProblem.computeScore(classExpression, noise);
221
222                        // compute accuracy
223                        double accuracy = score.getAccuracy();
224                        
225                        if(accuracy == -1) {
226                                node.setTooWeak();
227                        } else {
228                                node.setScore(score);
229                        }
230                        node.setAccuracy(accuracy);
231                        
232                        // link to parent (unless start node)
233                        if(parentNode == null) {
234                                startNode = node;
235                        } else {
236                                parentNode.addChild(node);
237                        }
238                        
239                        if(!node.isTooWeak()) {
240                                // add as candidate
241                                candidates.add(node);
242                                
243                                // check whether we want to add it to the best evaluated descriptions;
244                                // to do this we pick the worst considered evaluated description
245                                // (remember that the set has limited size, so it's likely not the worst overall);
246                                // the class expression has a chance to make it in the set if it has
247                                // at least as high accuracy - if not we can save the reasoner calls
248                                // for fully computing the evaluated description
249                                if(classToDescribe == null || !classToDescribe.equals(classExpression)) {
250                                        if(!bestEvaluatedDescriptions.isFull() || bestEvaluatedDescriptions.getWorst().getAccuracy() < node.getAccuracy()) {
251                                                EvaluatedDescription<Score> ed = new EvaluatedDescription<>(classExpression, score);
252                                                bestEvaluatedDescriptions.add(ed);
253//                                              System.out.println("Add " + ed);
254                                        } else {
255//                                              EvaluatedDescriptionPosNeg ed = new EvaluatedDescriptionPosNeg(classExpression, score);
256//                                              System.out.println("reject " + ed);
257                                        }
258                                }
259                        }
260                }
261        }
262        
263        private boolean stoppingCriteriaSatisfied() {
264                // in some cases, there could be no candidate left ...
265                if(candidates.isEmpty()) {
266                        logger.info("Stopping algorithm: No candidates left.");
267                        return true;
268                }
269                
270                // stop when max time is reached
271                boolean timeout = isTimeExpired();
272                if(timeout) {
273                        logger.info("Stopping algorithm: Max. execution time was reached.");
274                        return true;
275                }
276                
277                // stop if we have a node covering all positives and none of the negatives
278                SearchTreeNode bestNode = candidates.last();
279                boolean perfectDefinitionFound = bestNode.getAccuracy() == 1.0;
280                if(stopOnFirstDefinition && perfectDefinitionFound) {
281                        logger.info("Stopping algorithm: Perfect definition found.");
282                        return true;
283                }
284                                
285                return false;
286        }
287        
288        /*
289         * set all values back to their default values (used for running
290         * the algorithm more than once)
291         */
292        private void reset() {
293                candidates.clear();
294                bestEvaluatedDescriptions.getSet().clear();
295        }
296        
297        private boolean isDescriptionAllowed(OWLClassExpression description) {
298                if(learningProblem instanceof ClassLearningProblem) {
299                        if(isEquivalenceProblem) {
300                                // the class to learn must not appear on the outermost property level
301                                if(OWLClassExpressionUtils.occursOnFirstLevel(description, classToDescribe)) {
302                                        return false;
303                                }
304                                
305                                //non of the equivalent classes must occur on the first level
306                                TreeSet<OWLClassExpression> toTest = new TreeSet<>();
307                                if(classToDescribe != null){
308                                        toTest.add(classToDescribe);
309                                }
310                                while(!toTest.isEmpty()) {
311                                        OWLClassExpression d = toTest.pollFirst();
312                                        if(OWLClassExpressionUtils.occursOnFirstLevel(description, d)) {
313                                                return false;
314                                        }
315                                        toTest.addAll(reasoner.getEquivalentClasses(d));
316                                }
317                        } else {
318                                // none of the superclasses of the class to learn must appear on the
319                                // outermost property level
320                                TreeSet<OWLClassExpression> toTest = new TreeSet<>();
321                                if(classToDescribe != null){
322                                        toTest.add(classToDescribe);
323                                }
324                                while(!toTest.isEmpty()) {
325                                        OWLClassExpression d = toTest.pollFirst();
326                                        if(OWLClassExpressionUtils.occursOnFirstLevel(description, d)) {
327                                                return false;
328                                        }
329                                        toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d));
330                                }
331                        }       
332                        return true;
333                } else {
334                        // the class to learn must not appear on the outermost property level
335                        if(classToDescribe != null && OWLClassExpressionUtils.occursOnFirstLevel(description, classToDescribe)) {
336                                return false;
337                        }
338                }
339                
340                return true;
341        }
342        
343        private void writeSearchTree() {
344                StringBuilder treeString = new StringBuilder();
345                treeString.append(startNode.getTreeString(renderer)).append("\n");
346
347                // replace or append
348                if (replaceSearchTree) {
349                        Files.createFile(new File(searchTreeFile), treeString.toString());
350                } else {
351                        Files.appendToFile(new File(searchTreeFile), treeString.toString());
352                }
353        }
354        
355        /**
356         * @param heuristic the heuristic to set
357         */
358        public void setHeuristic(ELHeuristic heuristic) {
359                this.heuristic = heuristic;
360        }
361        
362        public boolean isInstanceBasedDisjoints() {
363                return instanceBasedDisjoints;
364        }
365
366        public void setInstanceBasedDisjoints(boolean instanceBasedDisjoints) {
367                this.instanceBasedDisjoints = instanceBasedDisjoints;
368        }
369        
370        /**
371         * @param stopOnFirstDefinition the stopOnFirstDefinition to set
372         */
373        public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) {
374                this.stopOnFirstDefinition = stopOnFirstDefinition;
375        }
376        
377        /**
378         * @return the stopOnFirstDefinition
379         */
380        public boolean isStopOnFirstDefinition() {
381                return stopOnFirstDefinition;
382        }
383        
384        /**
385         * @return the start node
386         */
387        public SearchTreeNode getStartNode() {
388                return startNode;
389        }       
390        
391        /**
392         * @return the noise in percentage
393         */
394        public double getNoisePercentage() {
395                return noisePercentage;
396        }
397        
398        /**
399         * @param noisePercentage the noise in percentage to set
400         */
401        public void setNoisePercentage(double noisePercentage) {
402                this.noisePercentage = noisePercentage;
403        }
404        
405        /**
406         * @param startClass the start class to set
407         */
408        public void setStartClass(OWLClassExpression startClass) {
409                this.startClass = startClass;
410        }
411        
412        /**
413         * @return the start class
414         */
415        public OWLClassExpression getStartClass() {
416                return startClass;
417        }
418        
419        /**
420         * @param ignoredConcepts the ignored concepts to set
421         */
422        @Override
423        public void setIgnoredConcepts(Set<OWLClass> ignoredConcepts) {
424                this.ignoredConcepts = ignoredConcepts;
425        }
426        
427        /**
428         * @return the ignored concepts
429         */
430        @Override
431        public Set<OWLClass> getIgnoredConcepts() {
432                return ignoredConcepts;
433        }
434        
435        /**
436         * @param classToDescribe the classToDescribe to set
437         */
438        public void setClassToDescribe(OWLClass classToDescribe) {
439                this.classToDescribe = classToDescribe;
440        }
441        
442        /**
443         * @param maxNrOfResults the maxNrOfResults to set
444         */
445        public void setMaxNrOfResults(int maxNrOfResults) {
446                this.maxNrOfResults = maxNrOfResults;
447        }
448        
449        /**
450         * @param maxClassExpressionDepth the maximum class expression depth to set
451         */
452        public void setMaxClassExpressionDepth(int maxClassExpressionDepth) {
453                this.maxClassExpressionDepth = maxClassExpressionDepth;
454        }
455        
456        /**
457         * @param writeSearchTree the writeSearchTree to set
458         */
459        public void setWriteSearchTree(boolean writeSearchTree) {
460                this.writeSearchTree = writeSearchTree;
461        }
462        
463        /**
464         * @param searchTreeFile the searchTreeFile to set
465         */
466        public void setSearchTreeFile(String searchTreeFile) {
467                this.searchTreeFile = searchTreeFile;
468        }
469        
470        /**
471         * @param replaceSearchTree the replaceSearchTree to set
472         */
473        public void setReplaceSearchTree(boolean replaceSearchTree) {
474                this.replaceSearchTree = replaceSearchTree;
475        }
476
477}