Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.ocel;
020
021import com.google.common.collect.Sets;
022import com.jamonapi.Monitor;
023import org.apache.log4j.Level;
024import org.dllearner.accuracymethods.AccMethodNoWeakness;
025import org.dllearner.core.*;
026import org.dllearner.core.config.ConfigOption;
027import org.dllearner.core.options.CommonConfigOptions;
028import org.dllearner.core.owl.ClassHierarchy;
029import org.dllearner.core.owl.DatatypePropertyHierarchy;
030import org.dllearner.core.owl.ObjectPropertyHierarchy;
031import org.dllearner.learningproblems.*;
032import org.dllearner.reasoning.ReasonerType;
033import org.dllearner.refinementoperators.*;
034import org.dllearner.utilities.Files;
035import org.dllearner.utilities.Helper;
036import org.dllearner.utilities.JamonMonitorLogger;
037import org.dllearner.utilities.TreeUtils;
038import org.dllearner.utilities.datastructures.SearchTreeNonWeak;
039import org.dllearner.utilities.datastructures.SearchTreeNonWeakPartialSet;
040import org.dllearner.utilities.owl.ConceptTransformation;
041import org.dllearner.utilities.owl.EvaluatedDescriptionPosNegComparator;
042import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric;
043import org.dllearner.utilities.owl.OWLClassExpressionUtils;
044import org.semanticweb.owlapi.model.*;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047import org.springframework.beans.factory.annotation.Autowired;
048
049import java.io.File;
050import java.text.DecimalFormat;
051import java.util.*;
052
053/**
054 * The DL-Learner learning algorithm component for the example
055 * based refinement operator approach. It handles all
056 * configuration options, creates the corresponding objects and
057 * passes them to the actual refinement operator, heuristic, and
058 * learning algorithm implementations.
059 *
060 * Note: The options supported by the ROLearner component and this
061 * one are not equal. Options that have been dropped for now:
062 * - horizontal expansion factor: The goal of the algorithm will
063 *     be to (hopefully) be able to learn long and complex concepts
064 *     more efficiently.
065 *     A horizontal expansion factor has its benefits, but limits
066 *     the length of concepts learnable in reasonable time to
067 *     about 15 with its default value of 0.6 and a small sized
068 *     background knowledge base. We hope to get more fine-grained
069 *     control of whether it makes sense to extend a node with
070 *     more sophisticated heuristics.
071 *     Dropping the horizontal expansion factor means that the
072 *     completeness of the algorithm depends on the heuristic.
073 *
074 * @author Jens Lehmann
075 *
076 */
077@ComponentAnn(name = "OWL Class Expression Learner", shortName = "ocel", version = 1.2)
078public class OCEL extends AbstractCELA {
079
080        // actual algorithm
081        private static Logger logger = LoggerFactory.getLogger(OCEL.class);
082        private String logLevel = CommonConfigOptions.logLevelDefault;
083
084        // often the learning problems needn't be accessed directly; instead
085        // use the example sets below and the posonly variable
086        private OWLClassExpression startDescription;
087        private int nrOfExamples;
088        private int nrOfPositiveExamples;
089        private Set<OWLIndividual> positiveExamples;
090        private int nrOfNegativeExamples;
091        private Set<OWLIndividual> negativeExamples;
092
093        private int allowedMisclassifications = 0;
094
095        // search tree options
096        @ConfigOption(defaultValue = "false", description = "specifies whether to write a search tree")
097        private boolean writeSearchTree;
098        @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree")
099        private File searchTreeFile;
100        @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree")
101        private boolean replaceSearchTree = false;
102
103        // constructs to improve performance
104        @ConfigOption(defaultValue = "true", description = "exclude too weak concepts when they occur as sub concept")
105        private boolean useTooWeakList = true;
106        @ConfigOption(defaultValue = "true")
107        private boolean useOverlyGeneralList = true;
108        @ConfigOption(defaultValue = "true", description = "whether to shorten concepts to ignore identical refinement. " +
109                        "e.g. male AND male is shortened to male. ")
110        private boolean useShortConceptConstruction = true;
111
112        // extended Options
113        private boolean maxExecutionTimeAlreadyReached = false;
114        @ConfigOption(defaultValue = "0", description = "Minimum time the algorithm has to run before termination (even if solution " +
115                        "already found")
116        private int minExecutionTimeInSeconds = CommonConfigOptions.minExecutionTimeInSecondsDefault;
117        private boolean minExecutionTimeAlreadyReached = false;
118        @ConfigOption(defaultValue = "1", description = "how many sufficient solutions must be found before termination, if " +
119                        "terminateOnNoiseReached is enabled")
120        private int guaranteeXgoodDescriptions = CommonConfigOptions.guaranteeXgoodDescriptionsDefault;
121        private boolean guaranteeXgoodAlreadyReached = false;
122        @ConfigOption(defaultValue = "0", description = "The maximum number of candidate hypothesis the algorithm is " +
123                        "allowed to test (0 = no limit). The algorithm will stop afterwards")
124        private int maxClassDescriptionTests = CommonConfigOptions.maxClassDescriptionTestsDefault;
125
126        @ConfigOption(defaultValue = "false", description = "if set to false we do not test properness; this may seem wrong " +
127                        "but the disadvantage of properness testing are additional reasoner queries and a search bias towards " +
128                        "ALL r.something because ALL r.TOP is improper and automatically expanded further")
129        private boolean usePropernessChecks = false;
130
131        @ConfigOption(defaultValue = "false", description = "tree traversal means to run through the most promising " +
132                        "concepts and connect them in an intersection to find a solution (this is called irregularly e.g. " +
133                        "every 100 seconds)")
134        private boolean useTreeTraversal = false;
135
136        @ConfigOption(defaultValue = "true",
137                      description = "if this variable is set to true, then the refinement operator is applied until all " +
138                                      "concept of equal length have been found e.g. TOP -> A1 -> A2 -> A3 is found in one loop; " +
139                                      "the disadvantage are potentially more method calls, but the advantage is that the algorithm " +
140                                      "is better in locating relevant concept in the subsumption hierarchy (otherwise, if the most " +
141                                      "general concept is not promising, it may never get expanded)")
142        private boolean forceRefinementLengthIncrease = true;
143
144        @ConfigOption(defaultValue = "true",
145                      description = "candidate reduction: using this mechanism we can simulate the divide&conquer approach " +
146                                      "in many ILP programs using a clause by clause search; after a period of time the candidate " +
147                                      "set is reduced to focus CPU time on the most promising concepts")
148        private boolean useCandidateReduction = true;
149        @ConfigOption(defaultValue = "30", description = "maximum number of candidates to retain")
150        private int candidatePostReductionSize = 30;
151
152        // solution protocol
153        private List<ExampleBasedNode> solutions = new LinkedList<>();
154
155        @ConfigOption(defaultValue = "false", description = "specifies whether to compute and log benchmark information")
156        private boolean computeBenchmarkInformation = false;
157
158        // comparator used to maintain a stable ordering of nodes, i.e.
159        // an ordering which does not change during the run of the algorithm
160        private NodeComparatorStable nodeComparatorStable = new NodeComparatorStable();
161        // node from which algorithm has started
162        private SearchTreeNonWeak<ExampleBasedNode> searchTreeStable;
163        private SearchTreeNonWeakPartialSet<ExampleBasedNode> searchTree;
164
165        // evaluated descriptions
166
167        // comparator for evaluated descriptions
168        private EvaluatedDescriptionPosNegComparator edComparator = new EvaluatedDescriptionPosNegComparator();
169
170        // utility variables
171        private DecimalFormat df = new DecimalFormat();
172
173        // all concepts which have been evaluated as being proper refinements
174        private SortedSet<OWLClassExpression> properRefinements = new TreeSet<>();
175
176        // blacklists
177        private SortedSet<OWLClassExpression> tooWeakList = new TreeSet<>();
178        private SortedSet<OWLClassExpression> overlyGeneralList = new TreeSet<>();
179
180        // set of expanded nodes (TODO: better explanation)
181        TreeSet<ExampleBasedNode> expandedNodes = new TreeSet<>(nodeComparatorStable);
182
183        // statistic variables
184        private int maxRecDepth = 0;
185        private int maxNrOfRefinements = 0;
186        private int maxNrOfChildren = 0;
187        private int redundantConcepts = 0;
188        private int propernessTestsReasoner = 0;
189        private int propernessTestsAvoidedByShortConceptConstruction = 0;
190        private int propernessTestsAvoidedByTooWeakList = 0;
191        private int conceptTestsTooWeakList = 0;
192        private int conceptTestsOverlyGeneralList = 0;
193        private int conceptTestsReasoner = 0;
194
195        // time variables
196        private long runtime;
197        private long algorithmStartTime;
198        private long propernessCalcTimeNs = 0;
199        private long propernessCalcReasoningTimeNs = 0;
200        private long childConceptsDeletionTimeNs = 0;
201        private long refinementCalcTimeNs = 0;
202        private long redundancyCheckTimeNs = 0;
203        private long evaluateSetCreationTimeNs = 0;
204        private long improperConceptsRemovalTimeNs = 0;
205
206        @ConfigOption(defaultValue = "true", description = "specifies whether to terminate when noise criterion is met")
207        private boolean terminateOnNoiseReached = true;
208
209        @ConfigOption(defaultValue = "1.0", description = "(for the ExampleBasedNode.) weighting factor on the number of " +
210                        "true negatives (true positives are weigthed with 1)")
211        private double negativeWeight = 1.0;
212
213        @ConfigOption(defaultValue = "0.1", description = "(for the ExampleBasedNode.) the score value for the start node")
214        private double startNodeBonus = 0.1;
215
216        @ConfigOption(description = "For the MultiHeuristic: how much accuracy gain is worth an increase of horizontal " +
217                        "expansion by one (typical value: 0.01)",
218                      defaultValue = "0.02")
219        private double expansionPenaltyFactor = 0.02;
220
221        @ConfigOption(defaultValue = "0", description = "(for the ExampleBasedNode.) penalty value to deduce for using a " +
222                        "negated class expression (complementOf)")
223        private int negationPenalty = 0;
224
225        @ConfigOption(description = "adjust the weights of class expression length in refinement",
226                      defaultValue = "OCEL default metric")
227        private OWLClassExpressionLengthMetric lengthMetric;
228
229        // dependencies
230        @ConfigOption(defaultValue = "RhoDRDown", description = "the refinement operator instance to use")
231        private LengthLimitedRefinementOperator operator;
232        @ConfigOption(description = "the heuristic to guide the search", defaultValue = "MultiHeuristic")
233        private ExampleBasedHeuristic heuristic;
234
235        // configuration options
236        private static String defaultSearchTreeFile = "log/searchTree.txt";
237
238        @ConfigOption(defaultValue = "true", description = "if enabled, modifies the subsumption hierarchy such that for " +
239                        "each class, there is only a single path to reach it via upward and downward refinement respectively.")
240        private boolean improveSubsumptionHierarchy = true;
241
242        private static double noisePercentageDefault = 0.0;
243        @ConfigOption(defaultValue = "0.0", description = "noise regulates how many positives can be misclassified and when " +
244                        "the algorithm terminates")
245        private double noisePercentage = noisePercentageDefault;
246        @ConfigOption(
247                        defaultValue = "owl:Thing",
248                        description = "You can specify a start class for the algorithm",
249                        exampleValue = "ex:Male or http://example.org/ontology/Female")
250        private OWLClassExpression startClass = null;
251
252        // Variablen zur Einstellung der Protokollierung
253        @ConfigOption(defaultValue = "false", description = "show additional timing info for benchmark purposes")
254        boolean showBenchmarkInformation = false;
255
256        public OCEL() {
257        }
258
259        // soll spÃ¤ter einen Operator und eine Heuristik entgegennehmen
260        public OCEL(PosNegLP learningProblem, AbstractReasonerComponent reasoningService) {
261                super(learningProblem, reasoningService);
262        }
263
264        public OCEL(PosOnlyLP learningProblem, AbstractReasonerComponent reasoningService) {
265                super(learningProblem, reasoningService);
266        }
267
268        /* (non-Javadoc)
269         * @see org.dllearner.core.Component#init()
270         */
271        @Override
272        public void init() throws ComponentInitException {
273                // exit with a ComponentInitException if the reasoner is unsupported for this learning algorithm
274                if (getReasoner().getReasonerType() == ReasonerType.DIG) {
275                        throw new ComponentInitException("DIG does not support the inferences needed in the selected learning algorithm component: " + AnnComponentManager.getName(this));
276                }
277
278                // set log level if the option has been set
279                if (!logLevel.equals(CommonConfigOptions.logLevelDefault))
280                        org.apache.log4j.Logger.getLogger(OCEL.class).setLevel(Level.toLevel(logLevel, Level.toLevel(CommonConfigOptions.logLevelDefault)));
281
282                if (searchTreeFile == null)
283                        searchTreeFile = new File(defaultSearchTreeFile);
284
285                if (writeSearchTree)
286                        Files.clearFile(searchTreeFile);
287
288                // adjust heuristic
289
290                if (heuristic == null) {
291                        if (getLearningProblem() instanceof PosOnlyLP) {
292                                throw new RuntimeException("does not work with positive examples only yet");
293                        } else {
294                                heuristic = new MultiHeuristic(((PosNegLP) getLearningProblem()).getPositiveExamples().size(), ((PosNegLP) getLearningProblem()).getNegativeExamples().size(), negativeWeight, startNodeBonus, expansionPenaltyFactor, negationPenalty);
295                        }
296                } else {
297                        // we need to set some variables to make the heuristic work
298                        if (heuristic instanceof MultiHeuristic) {
299                                MultiHeuristic mh = ((MultiHeuristic) heuristic);
300                                if (mh.getNrOfNegativeExamples() == 0) {
301                                        mh.setNrOfNegativeExamples(((PosNegLP) getLearningProblem()).getNegativeExamples().size());
302                                }
303                                int nrPosEx = ((PosNegLP) getLearningProblem()).getPositiveExamples().size();
304                                int nrNegEx = ((PosNegLP) getLearningProblem()).getNegativeExamples().size();
305                                if (mh.getNrOfExamples() == 0) {
306                                        mh.setNrOfExamples(nrPosEx + nrNegEx);
307                                }
308                                if (mh.getNrOfNegativeExamples() == 0) {
309                                        mh.setNrOfNegativeExamples(nrNegEx);
310                                }
311                        } else if (heuristic instanceof FlexibleHeuristic) {
312                                FlexibleHeuristic h2 = (FlexibleHeuristic) heuristic;
313                                if (h2.getNrOfNegativeExamples() == 0) {
314                                        h2.setNrOfNegativeExamples(((PosNegLP) getLearningProblem()).getNegativeExamples().size());
315                                }
316                        }
317
318                }
319
320                // compute used concepts/roles from allowed/ignored
321                // concepts/roles
322
323                // prepare subsumption and role hierarchies, because they are needed
324                // during the run of the algorithm;
325                // in contrast to before, the learning algorithms have to maintain their
326                // own view on the class hierarchy
327                ClassHierarchy classHierarchy = initClassHierarchy();
328                ObjectPropertyHierarchy objectPropertyHierarchy = initObjectPropertyHierarchy();
329                DatatypePropertyHierarchy datatypePropertyHierarchy = initDataPropertyHierarchy();
330                if (improveSubsumptionHierarchy) {
331                        classHierarchy.thinOutSubsumptionHierarchy();
332                        objectPropertyHierarchy.thinOutSubsumptionHierarchy();
333                        datatypePropertyHierarchy.thinOutSubsumptionHierarchy();
334                }
335
336                // create a refinement operator and pass all configuration
337                // variables to it
338                if (operator == null) {
339                        // we use a default operator and inject the class hierarchy for now
340                        operator = new RhoDRDown();
341                        if (operator instanceof CustomStartRefinementOperator) {
342                                ((CustomStartRefinementOperator) operator).setStartClass(startClass);
343                        }
344                        if (operator instanceof ReasoningBasedRefinementOperator) {
345                                ((ReasoningBasedRefinementOperator) operator).setReasoner(reasoner);
346                        }
347                        operator.init();
348                }
349                // TODO: find a better solution as this is quite difficult to debug
350                if (operator instanceof CustomHierarchyRefinementOperator) {
351                        ((CustomHierarchyRefinementOperator) operator).setClassHierarchy(classHierarchy);
352                        ((CustomHierarchyRefinementOperator) operator).setObjectPropertyHierarchy(objectPropertyHierarchy);
353                        ((CustomHierarchyRefinementOperator) operator).setDataPropertyHierarchy(datatypePropertyHierarchy);
354                }
355
356                if (lengthMetric == null) {
357                        lengthMetric = OWLClassExpressionLengthMetric.getOCELMetric();
358                }
359                operator.setLengthMetric(lengthMetric);
360
361                // create an algorithm object and pass all configuration
362                // options to it
363
364                positiveExamples = ((PosNegLP) learningProblem).getPositiveExamples();
365                negativeExamples = ((PosNegLP) learningProblem).getNegativeExamples();
366                nrOfPositiveExamples = positiveExamples.size();
367                nrOfNegativeExamples = negativeExamples.size();
368
369                nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples;
370                baseURI = reasoner.getBaseURI();
371                prefixes = reasoner.getPrefixes();
372                // note: used concepts and roles do not need to be passed
373                // as argument, because it is sufficient to prepare the
374                // concept and role hierarchy accordingly
375                
376                initialized = true;
377        }
378
379        @Override
380        public void start() {
381                stop = false;
382                isRunning = true;
383                runtime = System.currentTimeMillis();
384
385                // reset values (algorithms may be started several times)
386                searchTree = new SearchTreeNonWeakPartialSet<>(heuristic);
387                searchTreeStable = new SearchTreeNonWeak<>(nodeComparatorStable);
388                solutions.clear();
389                maxExecutionTimeAlreadyReached = false;
390                minExecutionTimeAlreadyReached = false;
391                guaranteeXgoodAlreadyReached = false;
392                propernessTestsReasoner = 0;
393                propernessTestsAvoidedByShortConceptConstruction = 0;
394                propernessTestsAvoidedByTooWeakList = 0;
395                conceptTestsTooWeakList = 0;
396                conceptTestsOverlyGeneralList = 0;
397                propernessCalcTimeNs = 0;
398                propernessCalcReasoningTimeNs = 0;
399                childConceptsDeletionTimeNs = 0;
400                refinementCalcTimeNs = 0;
401                redundancyCheckTimeNs = 0;
402                evaluateSetCreationTimeNs = 0;
403                improperConceptsRemovalTimeNs = 0;
404
405                Monitor totalLearningTime = JamonMonitorLogger.getTimeMonitor(OCEL.class, "totalLearningTime").start();
406                // TODO: write a JUnit test for this problem (long-lasting or infinite loops because
407                // redundant children of a node are called recursively when a node is extended twice)
408                /*
409                 */
410
411                // calculate quality threshold required for a solution
412                allowedMisclassifications = (int) Math.round(noisePercentage * nrOfExamples / 100);
413
414                // start search with start class
415                ExampleBasedNode startNode;
416                if (startDescription == null) {
417                        startNode = new ExampleBasedNode(dataFactory.getOWLThing(), this);
418                        startNode.setCoveredExamples(positiveExamples, negativeExamples);
419                } else {
420                        startNode = new ExampleBasedNode(startDescription, this);
421                        Set<OWLIndividual> coveredNegatives = reasoner.hasType(startDescription, negativeExamples);
422                        Set<OWLIndividual> coveredPositives = reasoner.hasType(startDescription, positiveExamples);
423                        startNode.setCoveredExamples(coveredPositives, coveredNegatives);
424                }
425
426                searchTree.addNode(null, startNode);
427                searchTreeStable.addNode(null, startNode);
428
429                ExampleBasedNode previousBestNode = startNode;
430                ExampleBasedNode bestNode = startNode;
431                ExampleBasedNode bestNodeStable = startNode;
432
433                logger.info("starting top down refinement with: " + renderer.render(startNode.getConcept()) + " (" + df.format(100 * startNode.getAccuracy()) + "% accuracy)");
434
435                int loop = 0;
436
437                algorithmStartTime = System.nanoTime();
438                long lastPrintTime = 0;
439                long lastTreeTraversalTime = System.nanoTime();
440                long lastReductionTime = System.nanoTime();
441                // try a traversal after x seconds
442                long traversalInterval = 300L * 1000000000L;
443                long reductionInterval = 300L * 1000000000L;
444                long currentTime;
445
446                while (!isTerminationCriteriaReached()) {
447                        // print statistics at most once a second
448                        currentTime = System.nanoTime();
449                        if (currentTime - lastPrintTime > 3000000000L) {
450                                printStatistics(false);
451                                lastPrintTime = currentTime;
452                                logger.debug("--- loop " + loop + " started ---");
453                        }
454
455                        // traverse the current search tree to find a solution
456                        if (useTreeTraversal && (currentTime - lastTreeTraversalTime > traversalInterval)) {
457                                traverseTree();
458                                lastTreeTraversalTime = System.nanoTime();
459                        }
460
461                        // reduce candidates to focus on promising concepts
462                        if (useCandidateReduction && (currentTime - lastReductionTime > reductionInterval)) {
463                                reduceCandidates();
464                                lastReductionTime = System.nanoTime();
465                                // Logger.getRootLogger().setLevel(Level.TRACE);
466                        }
467
468                        // we record when a more accurate node is found and log it
469                        if (bestNodeStable.getCovPosMinusCovNeg() < searchTreeStable.best()
470                                        .getCovPosMinusCovNeg()) {
471                                String acc = new DecimalFormat(".00%").format((searchTreeStable.best().getAccuracy()));
472                                // no handling needed, it will just look ugly in the output
473                                logger.info("more accurate (" + acc + ") class expression found: " + renderer.render(searchTreeStable.best().getConcept()));
474                                if (logger.isTraceEnabled()) {
475                                        logger.trace(Sets.difference(positiveExamples, bestNodeStable.getCoveredNegatives()).toString());
476                                        logger.trace(Sets.difference(negativeExamples, bestNodeStable.getCoveredNegatives()).toString());
477                                }
478                                printBestSolutions(5);
479                                printStatistics(false);
480                                bestNodeStable = searchTreeStable.best();
481                        }
482
483                        // chose best node according to heuristics
484                        bestNode = searchTree.best();
485                        // best node is removed temporarily, because extending it can
486                        // change its evaluation
487                        searchTree.updatePrepare(bestNode);
488                        extendNodeProper(bestNode, bestNode.getHorizontalExpansion() + 1);
489                        searchTree.updateDone(bestNode);
490                        previousBestNode = bestNode;
491
492                        if (writeSearchTree) {
493                                // String treeString = "";
494                                StringBuilder treeString = new StringBuilder("best node: " + bestNode + "\n");
495                                if (expandedNodes.size() > 1) {
496                                        treeString.append("all expanded nodes:\n");
497                                        for (ExampleBasedNode n : expandedNodes) {
498                                                treeString.append("   ").append(n).append("\n");
499                                        }
500                                }
501                                expandedNodes.clear();
502                                treeString.append(TreeUtils.toTreeString(startNode, heuristic));
503                                treeString.append("\n");
504
505                                if (replaceSearchTree)
506                                        Files.createFile(searchTreeFile, treeString.toString());
507                                else
508                                        Files.appendToFile(searchTreeFile, treeString.toString());
509                        }
510
511                        // Anzahl SchleifendurchlÃ¤ufe
512                        loop++;
513                }// end while
514
515                if (solutions.size() > 0) {
516                        int solutionLimit = 20;
517                        // we do not need to print the best node if we display the top 20 solutions below anyway
518                        logger.info("solutions (at most " + solutionLimit + " are shown):");
519                        int show = 1;
520                        for (ExampleBasedNode c : solutions) {
521                                logger.info(show + ": " + renderer.render(c.getConcept())
522                                                + " (accuracy " + df.format(100 * c.getAccuracy()) + "%, length "
523                                                + OWLClassExpressionUtils.getLength(c.getConcept())
524                                                + ", depth " + OWLClassExpressionUtils.getDepth(c.getConcept()) + ")");
525                                if (show >= solutionLimit) {
526                                        break;
527                                }
528                                show++;
529                        }
530                } else {
531                        logger.info("no appropriate solutions found (try increasing the noisePercentage parameter to what was reported as most accurate expression found above)");
532                }
533
534                logger.debug("size of candidate set: " + searchTree.size());
535                printBestSolutions(20);
536
537                printStatistics(true);
538
539                int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
540                if (stop) {
541                        logger.info("Algorithm stopped (" + conceptTests + " descriptions tested).\n");
542                } else {
543                        logger.info("Algorithm terminated successfully (" + conceptTests + " descriptions tested).\n");
544                        logger.info(reasoner.toString());
545                }
546
547                totalLearningTime.stop();
548                isRunning = false;
549        }
550
551        // we apply the operator recursively until all proper refinements up
552        // to the maxmimum length are reached
553        private void extendNodeProper(ExampleBasedNode node, int maxLength) {
554                long propCalcNsStart = System.nanoTime();
555
556                if (writeSearchTree)
557                        expandedNodes.add(node);
558
559                if (node.getChildren().size() > maxNrOfChildren)
560                        maxNrOfChildren = node.getChildren().size();
561
562                extendNodeProper(node, node.getConcept(), maxLength, 0);
563                node.setHorizontalExpansion(maxLength);
564
565                propernessCalcTimeNs += (System.nanoTime() - propCalcNsStart);
566        }
567
568        // for all refinements of concept up to max length, we check whether they are proper
569        // and call the method recursively if not
570        // recDepth is used only for statistics
571        private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept, int maxLength,
572                                      int recDepth) {
573
574                // do not execute methods if algorithm has been stopped (this means that the algorithm
575                // will terminate without further reasoning queries)
576                if (stop)
577                        return;
578
579                if (recDepth > maxRecDepth)
580                        maxRecDepth = recDepth;
581
582                // compute refinements => we must not delete refinements with low horizontal expansion,
583                // because they are used in recursive calls of this method later on
584                long refinementCalcTimeNsStart = System.nanoTime();
585                Set<OWLClassExpression> refinements = operator.refine(concept, maxLength, null);
586                refinementCalcTimeNs += System.nanoTime() - refinementCalcTimeNsStart;
587
588                if (refinements.size() > maxNrOfRefinements)
589                        maxNrOfRefinements = refinements.size();
590
591                // remove all refinements that are already children of the node
592                long childConceptsDeletionTimeNsStart = System.nanoTime();
593                refinements.removeAll(node.getChildConcepts());
594                childConceptsDeletionTimeNs += System.nanoTime() - childConceptsDeletionTimeNsStart;
595
596                // evaluate all concepts whose length is bigger than the horizontal expansion of the node
597                long evaluateSetCreationTimeNsStart = System.nanoTime();
598                Set<OWLClassExpression> toEvaluateConcepts = new TreeSet<>();
599                Iterator<OWLClassExpression> it = refinements.iterator();
600
601                while (it.hasNext()) {
602
603                        OWLClassExpression refinement = it.next();
604                        if (OWLClassExpressionUtils.getLength(refinement, lengthMetric) > node.getHorizontalExpansion()) {
605                                // TRUE means that improperness was detected, but FALSE does not mean that the refinement is proper
606                                boolean impropernessDetected = false;
607
608                                // 1. short concept construction
609                                if (useShortConceptConstruction) {
610                                        OWLClassExpression shortConcept = ConceptTransformation.getShortConcept(refinement);
611                                        // compare with original concept
612                                        int n = shortConcept.compareTo(concept);
613
614                                        // concepts are equal, i.e. refinement is improper
615                                        if (n == 0) {
616                                                propernessTestsAvoidedByShortConceptConstruction++;
617                                                impropernessDetected = true;
618                                        }
619                                }
620
621                                // 2. too weak test
622                                if (!impropernessDetected && useTooWeakList) {
623                                        if (refinement instanceof OWLObjectIntersectionOf) {
624                                                boolean tooWeakElement = containsTooWeakElement((OWLObjectIntersectionOf) refinement);
625                                                if (tooWeakElement) {
626                                                        propernessTestsAvoidedByTooWeakList++;
627                                                        conceptTestsTooWeakList++;
628                                                        impropernessDetected = true;
629                                                        // tooWeakList.add(refinement);
630
631                                                        // Knoten wird direkt erzeugt (es ist buganfÃ¤llig zwei PlÃ¤tze
632                                                        // zu haben, an denen Knoten erzeugt werden, aber es erscheint
633                                                        // hier am sinnvollsten)
634                                                        properRefinements.add(refinement);
635                                                        tooWeakList.add(refinement);
636
637                                                        ExampleBasedNode newNode = new ExampleBasedNode(refinement, this);
638                                                        newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1);
639                                                        newNode.setTooWeak(true);
640                                                        newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.TOO_WEAK_LIST);
641                                                        node.addChild(newNode);
642
643                                                        // Refinement muss gelÃ¶scht werden, da es proper ist
644                                                        it.remove();
645                                                }
646                                        }
647                                }
648
649                                // properness konnte nicht vorher ermittelt werden
650                                if (!impropernessDetected) {
651                                        toEvaluateConcepts.add(refinement);
652                                }
653
654                        }
655
656                }
657                evaluateSetCreationTimeNs += System.nanoTime() - evaluateSetCreationTimeNsStart;
658
659                Set<OWLClassExpression> improperConcepts = null;
660                if (toEvaluateConcepts.size() > 0) {
661                        // Test aller Konzepte auf properness (mit DIG in nur einer Anfrage)
662                        if (usePropernessChecks) {
663                                long propCalcReasoningStart = System.nanoTime();
664                                improperConcepts = reasoner.isSuperClassOf(toEvaluateConcepts, concept);
665
666                                propernessTestsReasoner += toEvaluateConcepts.size();
667                                propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart;
668                        }
669                }
670
671                long improperConceptsRemovalTimeNsStart = System.nanoTime();
672                // die improper Konzepte werden von den auszuwertenden gelÃ¶scht, d.h.
673                // alle proper concepts bleiben Ã¼brig (einfache Umbenennung)
674                if (improperConcepts != null)
675                        toEvaluateConcepts.removeAll(improperConcepts);
676                Set<OWLClassExpression> properConcepts = toEvaluateConcepts;
677                // alle proper concepts von refinements lÃ¶schen
678                refinements.removeAll(properConcepts);
679                improperConceptsRemovalTimeNs += System.nanoTime() - improperConceptsRemovalTimeNsStart;
680
681                for (OWLClassExpression refinement : properConcepts) {
682                        long redundancyCheckTimeNsStart = System.nanoTime();
683                        boolean nonRedundant = properRefinements.add(refinement);
684                        redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart;
685
686                        if (!nonRedundant)
687                                redundantConcepts++;
688
689                        // es wird nur ein neuer Knoten erzeugt, falls das Konzept nicht
690                        // schon existiert
691                        if (nonRedundant) {
692
693                                // newly created node
694                                ExampleBasedNode newNode = new ExampleBasedNode(refinement, this);
695                                // die -1 ist wichtig, da sonst keine gleich langen Refinements
696                                // fÃ¼r den neuen Knoten erlaubt wÃ¤ren z.B. person => male
697                                newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1);
698
699                                boolean qualityKnown = false;
700                                int quality = -2;
701
702                                // overly general list verwenden
703                                if (useOverlyGeneralList && refinement instanceof OWLObjectUnionOf) {
704                                        if (containsOverlyGeneralElement((OWLObjectUnionOf) refinement)) {
705                                                conceptTestsOverlyGeneralList++;
706                                                quality = nrOfNegativeExamples;
707                                                qualityKnown = true;
708                                                newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.OVERLY_GENERAL_LIST);
709                                                newNode.setCoveredExamples(positiveExamples, negativeExamples);
710                                        }
711
712                                }
713
714                                // QualitÃ¤t des Knotens auswerten
715                                if (!qualityKnown) { // -> quality == -2
716                                        long propCalcReasoningStart2 = System.nanoTime();
717                                        conceptTestsReasoner++;
718
719                                        // determine individuals which have not been covered yet
720                                        // (more efficient than full retrieval)
721                                        Set<OWLIndividual> coveredPositives = node.getCoveredPositives();
722                                        Set<OWLIndividual> newlyCoveredPositives = new HashSet<>();
723
724                                        // calculate how many pos. examples are not covered by the
725                                        // parent node of the refinement
726                                        int misclassifiedPositives = nrOfPositiveExamples - coveredPositives.size();
727
728                                        // iterate through all covered examples (examples which are not
729                                        // covered do not need to be tested, because they remain uncovered);
730                                        // DIG will be slow if we send each reasoner request individually
731                                        // (however if we send everything in one request, too many instance checks
732                                        // are performed => rely on fast instance checker)
733                                        for (OWLIndividual i : coveredPositives) {
734                                                // TODO: move code to a separate function
735                                                if (quality != -1) {
736                                                        boolean covered = reasoner.hasType(refinement, i);
737                                                        if (!covered)
738                                                                misclassifiedPositives++;
739                                                        else
740                                                                newlyCoveredPositives.add(i);
741
742                                                        if (misclassifiedPositives > allowedMisclassifications)
743                                                                quality = -1;
744
745                                                }
746                                        }
747
748                                        Set<OWLIndividual> newlyCoveredNegatives = null;
749                                        if (quality != -1) {
750                                                Set<OWLIndividual> coveredNegatives = node.getCoveredNegatives();
751                                                newlyCoveredNegatives = new HashSet<>();
752
753                                                for (OWLIndividual i : coveredNegatives) {
754                                                        boolean covered = reasoner.hasType(refinement, i);
755                                                        if (covered)
756                                                                newlyCoveredNegatives.add(i);
757                                                }
758                                        }
759
760                                        propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2;
761                                        newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.REASONER);
762                                        if (quality != -1 && !(((PosNegLP) learningProblem).getAccuracyMethod() instanceof AccMethodNoWeakness) &&
763                                                        ((PosNegLP) learningProblem).getAccuracyMethod().getAccOrTooWeak2(
764                                                                        newlyCoveredPositives.size(), nrOfPositiveExamples - newlyCoveredPositives.size(),
765                                                                        newlyCoveredNegatives.size(), nrOfNegativeExamples - newlyCoveredNegatives.size(),
766                                                                        1) == -1)
767                                                quality = -1;
768
769                                        if (quality != -1) {
770                                                // quality is the number of misclassifications (if it is
771                                                // not too weak)
772                                                quality = (nrOfPositiveExamples - newlyCoveredPositives.size())
773                                                                + newlyCoveredNegatives.size();
774                                                newNode.setCoveredExamples(newlyCoveredPositives, newlyCoveredNegatives);
775                                        }
776
777                                }
778
779                                if (quality == -1) {
780                                        newNode.setTooWeak(true);
781                                        // Blacklist fÃ¼r too weak concepts
782                                        tooWeakList.add(refinement);
783                                } else {
784                                        // LÃ¶sung gefunden
785                                        if (quality >= 0 && quality <= allowedMisclassifications) {
786                                                solutions.add(newNode);
787                                        }
788
789                                        // we need to make sure that all positives are covered
790                                        // before adding something to the overly general list
791                                        if ((newNode.getCoveredPositives().size() == nrOfPositiveExamples)
792                                                        && quality == nrOfNegativeExamples)
793                                                overlyGeneralList.add(refinement);
794
795                                }
796
797                                node.addChild(newNode);
798
799                                // it is often useful to continue expanding until a longer node is
800                                // reached (to replace atomic concepts with more specific ones)
801                                if (forceRefinementLengthIncrease && !newNode.isTooWeak()) {
802                                        // extend node again if its concept has the same length
803                                        if (OWLClassExpressionUtils.getLength(node.getConcept(), lengthMetric) == OWLClassExpressionUtils.getLength(newNode.getConcept(), lengthMetric)) {
804                                                extendNodeProper(newNode, refinement, maxLength, recDepth + 1);
805                                        }
806                                }
807
808                        }
809                }
810
811                // es sind jetzt noch alle Konzepte Ã¼brig, die improper refinements sind
812                // auf jedem dieser Konzepte wird die Funktion erneut aufgerufen, da
813                // sich proper refinements ergeben kÃ¶nnten
814                for (OWLClassExpression refinement : refinements) {
815                        // check for redundancy (otherwise we may run into very
816                        // time-intensive loops, see planned JUnit test case $x)
817
818                        long redundancyCheckTimeNsStart = System.nanoTime();
819                        boolean redundant = properRefinements.contains(refinement);
820                        redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart;
821
822                        if (!redundant) {
823                                extendNodeProper(node, refinement, maxLength, recDepth + 1);
824                        }
825                }
826        }
827
828        private void printStatistics(boolean finalStats) {
829                // TODO: viele Tests haben ergeben, dass man nie 100% mit der Zeitmessung abdecken
830                // kann (zum einen weil Stringausgabe verzÃ¶gert erfolgt und zum anderen weil
831                // Funktionsaufrufe, garbage collection, Zeitmessung selbst auch Zeit benÃ¶tigt);
832                // es empfiehlt sich folgendes Vorgehen:
833                // - Messung der Zeit eines Loops im Algorithmus
834                // - Messung der Zeit fÃ¼r alle node extensions innerhalb eines Loops
835                // => als Normalisierungsbasis empfehlen sich dann die Loopzeit statt
836                // Algorithmuslaufzeit
837                // ... momentan kann es aber auch erstmal so lassen
838
839                long algorithmRuntime = System.nanoTime() - algorithmStartTime;
840
841                if (!finalStats) {
842
843                        ExampleBasedNode bestNode = searchTreeStable.best();
844                        logger.debug("start node: "
845                                        + searchTreeStable.getRoot().getShortDescription());
846                        String bestNodeString = "currently best node: "
847                                        + bestNode.getShortDescription();
848
849                        logger.debug(bestNodeString);
850                        logger.trace(bestNode.getStats());
851                        if (bestNode.getCoveredNegatives().size() <= 5)
852                                logger.trace("covered negs: " + bestNode.getCoveredNegatives());
853                        String expandedNodeString = "next expanded node: "
854                                        + searchTree.best().getShortDescription();
855                        logger.debug(expandedNodeString);
856                        logger.debug("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime));
857                        logger.debug("size of candidate set: " + searchTree.size());
858                        logger.debug("subsumption time: " + Helper.prettyPrintNanoSeconds(reasoner.getSubsumptionReasoningTimeNs()));
859                        logger.debug("instance check time: " + Helper.prettyPrintNanoSeconds(reasoner.getInstanceCheckReasoningTimeNs()));
860                        logger.debug("retrieval time: " + Helper.prettyPrintNanoSeconds(reasoner.getRetrievalReasoningTimeNs()));
861                }
862
863                if (computeBenchmarkInformation) {
864
865                        long reasoningTime = reasoner.getOverallReasoningTimeNs();
866                        double reasoningPercentage = 100 * reasoningTime / (double) algorithmRuntime;
867                        long propWithoutReasoning = propernessCalcTimeNs - propernessCalcReasoningTimeNs;
868                        double propPercentage = 100 * propWithoutReasoning / (double) algorithmRuntime;
869                        double deletionPercentage = 100 * childConceptsDeletionTimeNs / (double) algorithmRuntime;
870                        long subTime = reasoner.getSubsumptionReasoningTimeNs();
871                        double subPercentage = 100 * subTime / (double) algorithmRuntime;
872                        double refinementPercentage = 100 * refinementCalcTimeNs / (double) algorithmRuntime;
873                        double redundancyCheckPercentage = 100 * redundancyCheckTimeNs / (double) algorithmRuntime;
874                        double evaluateSetCreationTimePercentage = 100 * evaluateSetCreationTimeNs / (double) algorithmRuntime;
875                        double improperConceptsRemovalTimePercentage = 100 * improperConceptsRemovalTimeNs / (double) algorithmRuntime;
876
877                        logger.debug("reasoning percentage: " + df.format(reasoningPercentage) + "%");
878                        logger.debug("   subsumption check time: " + df.format(subPercentage) + "%");
879                        logger.debug("proper calculation percentage (wo. reasoning): " + df.format(propPercentage) + "%");
880                        logger.debug("   deletion time percentage: " + df.format(deletionPercentage) + "%");
881                        logger.debug("   refinement calculation percentage: " + df.format(refinementPercentage) + "%");
882
883                        if (operator instanceof RhoDRDown) {
884                                double mComputationTimePercentage = 100 * ((RhoDRDown) operator).mComputationTimeNs / (double) algorithmRuntime;
885                                double topComputationTimePercentage = 100 * ((RhoDRDown) operator).topComputationTimeNs / (double) algorithmRuntime;
886                                logger.debug("      m calculation percentage: " + df.format(mComputationTimePercentage) + "%");
887                                logger.debug("      top calculation percentage: " + df.format(topComputationTimePercentage) + "%");
888                        }
889
890                        double cleanTimePercentage = 100 * ConceptTransformation.cleaningTimeNs / (double) algorithmRuntime;
891                        double onnfTimePercentage = 100 * ConceptTransformation.onnfTimeNs / (double) algorithmRuntime;
892                        double shorteningTimePercentage = 100 * ConceptTransformation.shorteningTimeNs / (double) algorithmRuntime;
893
894                        logger.debug("   redundancy check percentage: " + df.format(redundancyCheckPercentage) + "%");
895                        logger.debug("   evaluate set creation time percentage: " + df.format(evaluateSetCreationTimePercentage) + "%");
896                        logger.debug("   improper concepts removal time percentage: " + df.format(improperConceptsRemovalTimePercentage) + "%");
897                        logger.debug("clean time percentage: " + df.format(cleanTimePercentage) + "%");
898                        logger.debug("onnf time percentage: " + df.format(onnfTimePercentage) + "%");
899                        logger.debug("shortening time percentage: " + df.format(shorteningTimePercentage) + "%");
900                }
901
902                logger.debug("properness tests (reasoner/short concept/too weak list): "
903                                + propernessTestsReasoner + "/" + propernessTestsAvoidedByShortConceptConstruction
904                                + "/" + propernessTestsAvoidedByTooWeakList);
905                logger.debug("concept tests (reasoner/too weak list/overly general list/redundant concepts): "
906                                + conceptTestsReasoner + "/" + conceptTestsTooWeakList + "/" + conceptTestsOverlyGeneralList + "/" + redundantConcepts);
907        }
908
909        private boolean containsTooWeakElement(OWLObjectIntersectionOf mc) {
910                for (OWLClassExpression child : mc.getOperands()) {
911                        if (tooWeakList.contains(child))
912                                return true;
913                }
914                return false;
915        }
916
917        private boolean containsOverlyGeneralElement(OWLObjectUnionOf md) {
918                for (OWLClassExpression child : md.getOperands()) {
919                        if (overlyGeneralList.contains(child))
920                                return true;
921                }
922                return false;
923        }
924
925        // TODO: investigate whether it makes sense not to store all individuals
926        // in the nodes, but instead perform instance checks in tree traversal
927        // (it is only run in large intervals so it shouldn't be too expensive)
928        private void traverseTree() {
929                ExampleBasedNode startNode = findBestTraversalStartNode();
930                OWLClassExpression currentDescription = startNode.getConcept();
931                Set<OWLIndividual> currentCoveredPos = startNode.getCoveredPositives();
932                Set<OWLIndividual> currentCoveredNeg = startNode.getCoveredNegatives();
933                double currentAccuracy = startNode.getAccuracy();
934                int currentMisclassifications = nrOfPositiveExamples - currentCoveredPos.size()
935                                + currentCoveredNeg.size();
936                logger.debug("tree traversal start node "
937                                + startNode
938                                .getShortDescription());
939                logger.debug("tree traversal start accuracy: " + currentAccuracy);
940                int i = 0;
941                // start from the most promising nodes
942                SortedSet<ExampleBasedNode> reverseView = searchTreeStable.descendingSet();
943                for (ExampleBasedNode currNode : reverseView) {
944                        // compute covered positives and negatives
945                        SortedSet<OWLIndividual> newCoveredPositives = new TreeSet<>(currentCoveredPos);
946                        newCoveredPositives.retainAll(currNode.getCoveredPositives());
947                        SortedSet<OWLIndividual> newCoveredNegatives = new TreeSet<>(currentCoveredNeg);
948                        newCoveredNegatives.retainAll(currNode.getCoveredNegatives());
949
950                        // compute the accuracy we would get by adding this node
951                        double accuracy = (newCoveredPositives.size() + nrOfNegativeExamples - newCoveredNegatives.size())
952                                        / (double) (nrOfPositiveExamples + nrOfNegativeExamples);
953                        int misclassifications = nrOfPositiveExamples - newCoveredPositives.size() + newCoveredNegatives.size();
954                        int misclassifiedPositives = nrOfPositiveExamples - newCoveredPositives.size();
955
956                        int lostPositives = currentCoveredPos.size() - newCoveredPositives.size();
957
958                        // TODO: maybe we should also consider a minimum improvement when adding something
959                        // otherwise we could overfit
960                        // we give double weith to lost positives, i.e. when one positive is
961                        // lost at least two negatives need to be uncovered
962                        boolean consider = (misclassifications + lostPositives < currentMisclassifications)
963                                        && (misclassifiedPositives <= allowedMisclassifications);
964
965                        // concept has been chosen, so construct it
966                        if (consider) {
967
968                                // construct a new concept as intersection of both
969                                OWLClassExpression mc = dataFactory.getOWLObjectIntersectionOf(currentDescription, currNode.getConcept());
970
971                                mc = ConceptTransformation.cleanConceptNonRecursive(mc);
972                                mc = mc.getNNF();
973
974                                logger.debug("misclassifications: " + misclassifications);
975                                logger.debug("misclassified positives: " + misclassifiedPositives);
976                                logger.debug("accuracy: " + accuracy);
977
978                                // update variables
979                                currentDescription = mc;
980                                currentCoveredPos = newCoveredPositives;
981                                currentCoveredNeg = newCoveredNegatives;
982                                currentMisclassifications = misclassifications;
983                                //noinspection UnusedAssignment
984                                currentAccuracy = accuracy;
985
986                                if (accuracy > 1 - (noisePercentage / 100)) {
987                                        logger.info("traversal found " + mc);
988                                        logger.info("accuracy: " + accuracy);
989                                        System.exit(0);
990                                }
991                        }
992
993                        i++;
994                        if (i == 1000)
995                                break;
996                }
997
998        }
999
1000        // we look for a node covering many positives and hopefully
1001        // few negatives; we give a strong penalty on uncovered positives
1002        private ExampleBasedNode findBestTraversalStartNode() {
1003                // 2 points for each covered pos + 1 point for each uncovered neg
1004                int currScore = 0;
1005                int i = 0;
1006                ExampleBasedNode currNode = null;
1007                SortedSet<ExampleBasedNode> reverseView = searchTreeStable.descendingSet();
1008                for (ExampleBasedNode node : reverseView) {
1009                        int score = 2 * node.getCoveredPositives().size()
1010                                        + (nrOfNegativeExamples - node.getCoveredNegatives().size());
1011                        if (score > currScore) {
1012                                currScore = score;
1013                                currNode = node;
1014                        }
1015                        i++;
1016                        // limit search because stable candidate set can grow very large
1017                        if (i == 10000)
1018                                break;
1019                }
1020                return currNode;
1021        }
1022
1023        private void reduceCandidates() {
1024                Iterator<ExampleBasedNode> it = searchTreeStable.descendingIterator();
1025                Set<ExampleBasedNode> promisingNodes = new HashSet<>();
1026                int i = 0;
1027                while (it.hasNext() && promisingNodes.size() < candidatePostReductionSize) {
1028                        ExampleBasedNode node = it.next();
1029                        // first criterion: the considered node should have an accuracy gain over its parent
1030                        // (avoids to use only the most promising node + all its refinements with equal accuracy)
1031                        boolean hasAccuracyGain = (node.getParent() == null)
1032                                        || (node.getCoveredPositives().size() != node.getParent().getCoveredPositives().size())
1033                                        || (node.getCoveredNegatives().size() != node.getParent().getCoveredNegatives().size());
1034                        // second criterion: uncovered positives; it does not make much sense to pick nodes with
1035                        // low potential for reaching a solution (already at the limit of misclassified positives)
1036                        int misclassifiedPositives = nrOfPositiveExamples - node.getCoveredPositives().size();
1037                        boolean hasRefinementPotential = (misclassifiedPositives <= Math.floor(0.65d * allowedMisclassifications));
1038                        boolean keep = hasAccuracyGain && hasRefinementPotential;
1039                        if (keep) {
1040                                promisingNodes.add(node);
1041                        }
1042                        i++;
1043                }
1044                searchTree.retainAll(promisingNodes);
1045                logger.debug("searched " + i + " nodes and picked the following promising descriptions:");
1046                if (logger.isDebugEnabled()) {
1047                        for (ExampleBasedNode node : promisingNodes)
1048                                logger.debug(node.getShortDescription());
1049                }
1050        }
1051
1052        public OWLClassExpression getBestSolution() {
1053                return searchTreeStable.best().getConcept();
1054        }
1055
1056        public List<OWLClassExpression> getCurrentlyBestDescriptions() {
1057                List<OWLClassExpression> best = new LinkedList<>();
1058                int i = 0;
1059                int nrOfSolutions = 200;
1060                for (ExampleBasedNode n : searchTreeStable.descendingSet()) {
1061                        best.add(n.getConcept());
1062                        if (i == nrOfSolutions)
1063                                return best;
1064                        i++;
1065                }
1066                return best;
1067        }
1068
1069        public TreeSet<EvaluatedDescriptionPosNeg> getCurrentlyBestEvaluatedDescriptions() {
1070                Iterator<ExampleBasedNode> it = searchTreeStable.descendingIterator();
1071                int count = 0;
1072                TreeSet<EvaluatedDescriptionPosNeg> cbd = new TreeSet<>(edComparator);
1073                while (it.hasNext()) {
1074                        ExampleBasedNode eb = it.next();
1075                        cbd.add(new EvaluatedDescriptionPosNeg(eb.getConcept(), getScore(eb.getConcept())));
1076                        // return a maximum of 200 elements (we need a maximum, because the
1077                        // candidate set can be very large)
1078                        if (count > 200)
1079                                return cbd;
1080                        count++;
1081                }
1082                return cbd;
1083        }
1084
1085        public void printBestSolutions(int nrOfSolutions) {
1086                // QUALITY: could be optimized
1087                if (!logger.isTraceEnabled()) {
1088                        return;
1089                }
1090                if (nrOfSolutions == 0)
1091                        nrOfSolutions = searchTreeStable.size();
1092                int i = 0;
1093                for (ExampleBasedNode n : searchTreeStable.descendingSet()) {
1094                        if (n.getAccuracy() < 1)
1095                                break;
1096                        logger.trace("best: "
1097                                        + n.getShortDescription());
1098                        if (i == nrOfSolutions)
1099                                break;
1100                        i++;
1101                }
1102
1103        }
1104
1105        public ScorePosNeg getSolutionScore() {
1106                return ((PosNegLP) learningProblem).computeScore(getBestSolution());
1107        }
1108
1109        private ScorePosNeg getScore(OWLClassExpression d) {
1110                return ((PosNegLP) learningProblem).computeScore(d);
1111        }
1112
1113        public ExampleBasedNode getStartNode() {
1114                return searchTreeStable.getRoot();
1115        }
1116
1117        /**
1118         * In this function it is calculated whether the algorithm should stop.
1119         * This is not always depends whether an actual solution was found
1120         * The algorithm stops if:
1121         * 1. the object attribute stop is set to true (possibly by an outside source)
1122         * 2. the maximimum execution time is reached
1123         * 3. the maximum number of class description tests is reached
1124         *
1125         * Continuation criteria and result improvement
1126         * The algorithm continues (although it would normally stop) if
1127         * 1. Minimum execution time is not reached (default 0)
1128         * 2. not enough good solutions are found (default 1)
1129         * otherwise it stops
1130         *
1131         * @return true if the algorithm should stop, this is mostly indepent of the question if a solution was found
1132         */
1133        private boolean isTerminationCriteriaReached() {
1134                // algorithm was stopped from outside
1135                if (this.stop) {
1136                        return true;
1137                }
1138
1139                long totalTimeNeeded = System.currentTimeMillis() - this.runtime;
1140                long maxMilliSeconds = maxExecutionTimeInSeconds * 1000;
1141                long minMilliSeconds = minExecutionTimeInSeconds * 1000;
1142                int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
1143                boolean result = false;
1144
1145                //ignore default
1146                if (maxExecutionTimeInSeconds == 0)
1147                        result = false;
1148                        //alreadyReached
1149                else if (maxExecutionTimeAlreadyReached)
1150                        return true;
1151                        //test
1152                else if (maxMilliSeconds < totalTimeNeeded) {
1153                        this.stop();
1154                        logger.info("Maximum time (" + maxExecutionTimeInSeconds
1155                                        + " seconds) reached, stopping now...");
1156                        maxExecutionTimeAlreadyReached = true;
1157                        return true;
1158                }
1159
1160                //ignore default
1161                if (maxClassDescriptionTests == 0)
1162                        result = false;
1163                        //test
1164                else if (conceptTests >= maxClassDescriptionTests) {
1165                        logger.info("Maximum Class OWLClassExpression tests (" + maxClassDescriptionTests
1166                                        + " tests [actual: " + conceptTests + "]) reached, stopping now...");
1167                        return true;
1168                }
1169
1170                // we stop if sufficiently many solutions (concepts fitting the noise parameter) have been
1171                // reached - unless this termination criterion is switched off using terminateOnNoiseReached = false
1172                if (guaranteeXgoodAlreadyReached) {
1173                        result = true;
1174                } else if (solutions.size() >= guaranteeXgoodDescriptions && terminateOnNoiseReached) {
1175                        if (guaranteeXgoodDescriptions != 1) {
1176                                logger.info("Minimum number (" + guaranteeXgoodDescriptions + ") of good descriptions reached.");
1177
1178                                guaranteeXgoodAlreadyReached = true;
1179                                result = true;
1180                        }
1181                }
1182
1183                if (minExecutionTimeAlreadyReached) {
1184                        result = result && true;
1185                } else if (minMilliSeconds < totalTimeNeeded) {
1186                        if (minExecutionTimeInSeconds != 0) {
1187                                logger.info("Minimum time (" + minExecutionTimeInSeconds + " seconds) reached.");
1188                        }
1189                        minExecutionTimeAlreadyReached = true;
1190                        result = result && true;
1191                } else {
1192                        result = false;
1193                }
1194
1195                return result;
1196
1197        }
1198
1199        public boolean isUseTreeTraversal() {
1200                return useTreeTraversal;
1201        }
1202
1203        public void setUseTreeTraversal(boolean useTreeTraversal) {
1204                this.useTreeTraversal = useTreeTraversal;
1205        }
1206
1207        public boolean isUseCandidateReduction() {
1208                return useCandidateReduction;
1209        }
1210
1211        public void setUseCandidateReduction(boolean useCandidateReduction) {
1212                this.useCandidateReduction = useCandidateReduction;
1213        }
1214
1215        public int getCandidatePostReductionSize() {
1216                return candidatePostReductionSize;
1217        }
1218
1219        public void setCandidatePostReductionSize(int candidatePostReductionSize) {
1220                this.candidatePostReductionSize = candidatePostReductionSize;
1221        }
1222
1223        public boolean isComputeBenchmarkInformation() {
1224                return computeBenchmarkInformation;
1225        }
1226
1227        public void setComputeBenchmarkInformation(boolean computeBenchmarkInformation) {
1228                this.computeBenchmarkInformation = computeBenchmarkInformation;
1229        }
1230
1231        @Override
1232        public OWLClassExpression getCurrentlyBestDescription() {
1233                return getBestSolution();
1234        }
1235
1236        @Override
1237        public EvaluatedDescriptionPosNeg getCurrentlyBestEvaluatedDescription() {
1238                return new EvaluatedDescriptionPosNeg(getBestSolution(), getSolutionScore());
1239        }
1240
1241        public LengthLimitedRefinementOperator getRefinementOperator() {
1242                return operator;
1243        }
1244
1245        public LengthLimitedRefinementOperator getOperator() {
1246                return operator;
1247        }
1248
1249        @Autowired(required = false)
1250        public void setOperator(LengthLimitedRefinementOperator operator) {
1251                this.operator = operator;
1252        }
1253
1254        public boolean isWriteSearchTree() {
1255                return writeSearchTree;
1256        }
1257
1258        public void setWriteSearchTree(boolean writeSearchTree) {
1259                this.writeSearchTree = writeSearchTree;
1260        }
1261
1262        public File getSearchTreeFile() {
1263                return searchTreeFile;
1264        }
1265
1266        public void setSearchTreeFile(File searchTreeFile) {
1267                this.searchTreeFile = searchTreeFile;
1268        }
1269
1270        public boolean isReplaceSearchTree() {
1271                return replaceSearchTree;
1272        }
1273
1274        public void setReplaceSearchTree(boolean replaceSearchTree) {
1275                this.replaceSearchTree = replaceSearchTree;
1276        }
1277
1278        public boolean isUseTooWeakList() {
1279                return useTooWeakList;
1280        }
1281
1282        public void setUseTooWeakList(boolean useTooWeakList) {
1283                this.useTooWeakList = useTooWeakList;
1284        }
1285
1286        public boolean isUseOverlyGeneralList() {
1287                return useOverlyGeneralList;
1288        }
1289
1290        public void setUseOverlyGeneralList(boolean useOverlyGeneralList) {
1291                this.useOverlyGeneralList = useOverlyGeneralList;
1292        }
1293
1294        public boolean isUseShortConceptConstruction() {
1295                return useShortConceptConstruction;
1296        }
1297
1298        public void setUseShortConceptConstruction(boolean useShortConceptConstruction) {
1299                this.useShortConceptConstruction = useShortConceptConstruction;
1300        }
1301
1302        public boolean isImproveSubsumptionHierarchy() {
1303                return improveSubsumptionHierarchy;
1304        }
1305
1306        public void setImproveSubsumptionHierarchy(boolean improveSubsumptionHierarchy) {
1307                this.improveSubsumptionHierarchy = improveSubsumptionHierarchy;
1308        }
1309
1310        public double getNoisePercentage() {
1311                return noisePercentage;
1312        }
1313
1314        public void setNoisePercentage(double noisePercentage) {
1315                this.noisePercentage = noisePercentage;
1316        }
1317
1318        public OWLClassExpression getStartClass() {
1319                return startClass;
1320        }
1321
1322        public void setStartClass(OWLClass startClass) {
1323                this.startClass = startClass;
1324        }
1325
1326        public boolean isUsePropernessChecks() {
1327                return usePropernessChecks;
1328        }
1329
1330        public void setUsePropernessChecks(boolean usePropernessChecks) {
1331                this.usePropernessChecks = usePropernessChecks;
1332        }
1333
1334        public boolean isForceRefinementLengthIncrease() {
1335                return forceRefinementLengthIncrease;
1336        }
1337
1338        public void setForceRefinementLengthIncrease(boolean forceRefinementLengthIncrease) {
1339                this.forceRefinementLengthIncrease = forceRefinementLengthIncrease;
1340        }
1341
1342        public int getMinExecutionTimeInSeconds() {
1343                return minExecutionTimeInSeconds;
1344        }
1345
1346        public void setMinExecutionTimeInSeconds(int minExecutionTimeInSeconds) {
1347                this.minExecutionTimeInSeconds = minExecutionTimeInSeconds;
1348        }
1349
1350        public int getGuaranteeXgoodDescriptions() {
1351                return guaranteeXgoodDescriptions;
1352        }
1353
1354        public void setGuaranteeXgoodDescriptions(int guaranteeXgoodDescriptions) {
1355                this.guaranteeXgoodDescriptions = guaranteeXgoodDescriptions;
1356        }
1357
1358        public int getMaxClassDescriptionTests() {
1359                return maxClassDescriptionTests;
1360        }
1361
1362        public void setMaxClassDescriptionTests(int maxClassDescriptionTests) {
1363                this.maxClassDescriptionTests = maxClassDescriptionTests;
1364        }
1365
1366        public boolean isShowBenchmarkInformation() {
1367                return showBenchmarkInformation;
1368        }
1369
1370        public void setShowBenchmarkInformation(boolean showBenchmarkInformation) {
1371                this.showBenchmarkInformation = showBenchmarkInformation;
1372        }
1373
1374        public double getNegativeWeight() {
1375                return negativeWeight;
1376        }
1377
1378        public void setNegativeWeight(double negativeWeight) {
1379                this.negativeWeight = negativeWeight;
1380        }
1381
1382        public double getStartNodeBonus() {
1383                return startNodeBonus;
1384        }
1385
1386        public void setStartNodeBonus(double startNodeBonus) {
1387                this.startNodeBonus = startNodeBonus;
1388        }
1389
1390        public double getExpansionPenaltyFactor() {
1391                return expansionPenaltyFactor;
1392        }
1393
1394        public void setExpansionPenaltyFactor(double expansionPenaltyFactor) {
1395                this.expansionPenaltyFactor = expansionPenaltyFactor;
1396        }
1397
1398        public int getNegationPenalty() {
1399                return negationPenalty;
1400        }
1401
1402        public void setNegationPenalty(int negationPenalty) {
1403                this.negationPenalty = negationPenalty;
1404        }
1405
1406        public boolean isTerminateOnNoiseReached() {
1407                return terminateOnNoiseReached;
1408        }
1409
1410        public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) {
1411                this.terminateOnNoiseReached = terminateOnNoiseReached;
1412        }
1413
1414        public OWLClassExpressionLengthMetric getLengthMetric() {
1415                return lengthMetric;
1416        }
1417
1418        @Autowired(required = false)
1419        public void setLengthMetric(OWLClassExpressionLengthMetric lengthMetric) {
1420                this.lengthMetric = lengthMetric;
1421                if (operator != null) {
1422                        operator.setLengthMetric(lengthMetric);
1423                }
1424        }
1425
1426        @Autowired(required = false)
1427        public void setHeuristic(ExampleBasedHeuristic heuristic) {
1428                this.heuristic = heuristic;
1429        }
1430
1431        public ExampleBasedHeuristic getHeuristic() {
1432                return heuristic;
1433        }
1434}