001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl;
020
021import com.google.common.collect.BiMap;
022import com.google.common.collect.HashBiMap;
023import com.google.common.collect.Sets;
024import com.jamonapi.MonitorFactory;
025import gnu.trove.map.TObjectIntMap;
026import gnu.trove.map.hash.TObjectIntHashMap;
027import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
028import org.apache.jena.rdf.model.Model;
029import org.apache.jena.rdf.model.ModelFactory;
030import org.dllearner.algorithms.qtl.datastructures.impl.EvaluatedRDFResourceTree;
031import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy;
032import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeSubsumptionStrategy;
033import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
034import org.dllearner.algorithms.qtl.heuristics.QueryTreeHeuristic;
035import org.dllearner.algorithms.qtl.heuristics.QueryTreeHeuristicSimple;
036import org.dllearner.algorithms.qtl.impl.QueryTreeFactory;
037import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase;
038import org.dllearner.algorithms.qtl.operations.lgg.*;
039import org.dllearner.algorithms.qtl.util.Entailment;
040import org.dllearner.algorithms.qtl.util.filters.PredicateExistenceFilterDBpedia;
041import org.dllearner.core.*;
042import org.dllearner.core.StringRenderer.Rendering;
043import org.dllearner.core.config.ConfigOption;
044import org.dllearner.kb.OWLAPIOntology;
045import org.dllearner.kb.OWLFile;
046import org.dllearner.kb.SparqlEndpointKS;
047import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator;
048import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl;
049import org.dllearner.learningproblems.Heuristics;
050import org.dllearner.learningproblems.PosNegLP;
051import org.dllearner.learningproblems.QueryTreeScore;
052import org.semanticweb.owlapi.model.OWLClassExpression;
053import org.semanticweb.owlapi.model.OWLIndividual;
054import org.semanticweb.owlapi.util.SimpleShortFormProvider;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057import org.springframework.beans.factory.annotation.Autowired;
058
059import java.io.ByteArrayInputStream;
060import java.io.IOException;
061import java.text.DecimalFormat;
062import java.util.*;
063import java.util.Map.Entry;
064import java.util.concurrent.*;
065import java.util.stream.Collectors;
066
067/**
068 * A tree-based algorithm ... \todo add explanation
069 */
070@ComponentAnn(name="query tree learner with noise (disjunctive) - multi-threaded", shortName="qtl2dismt", version=0.8)
071public class QTL2DisjunctiveMultiThreaded extends AbstractCELA implements Cloneable{
072
073        private static final Logger logger = LoggerFactory.getLogger(QTL2DisjunctiveMultiThreaded.class);
074        private final DecimalFormat dFormat = new DecimalFormat("0.00");
075
076        private SparqlEndpointKS ks;
077
078//      private LGGGenerator2 lggGenerator = new LGGGeneratorSimple();
079        private AbstractLGGGenerator lggGenerator;
080
081        private QueryTreeFactory treeFactory;
082        private ConciseBoundedDescriptionGenerator cbdGen;
083
084        private Queue<EvaluatedRDFResourceTree> todoList;
085        private SortedSet<EvaluatedRDFResourceTree> currentPartialSolutions = new ConcurrentSkipListSet<>();
086
087        private double bestCurrentScore = 0d;
088        private EvaluatedRDFResourceTree bestPartialSolutionTree;
089
090        private List<RDFResourceTree> currentPosExampleTrees = new ArrayList<>();
091        private List<RDFResourceTree> currentNegExampleTrees = new ArrayList<>();
092        private Set<OWLIndividual> currentPosExamples = new HashSet<>();
093        private Set<OWLIndividual> currentNegExamples = new HashSet<>();
094
095        private BiMap<RDFResourceTree, OWLIndividual> tree2Individual = HashBiMap.create();
096
097        private PosNegLP lp;
098
099        private Model model;
100
101        private volatile boolean stop;
102        private boolean isRunning;
103
104        private List<EvaluatedRDFResourceTree> partialSolutions;
105
106        private EvaluatedDescription<? extends Score> currentBestSolution;
107
108        private QueryTreeHeuristic heuristic;
109
110        //Parameters
111        @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples")
112        private double noisePercentage = 0.0;
113
114        private double coverageWeight = 0.8;
115        private double specifityWeight = 0.1;
116
117        private double minCoveredPosExamplesFraction = 0.2;
118
119        // maximum execution time to compute a part of the solution
120        private double maxTreeComputationTimeInSeconds = 10;
121
122        @ConfigOption(defaultValue = "1", description = "how important it is not to cover negatives")
123        private double beta = 1;
124
125        // minimum score a query tree must have to be part of the solution
126        private double minimumTreeScore = 0.3;
127
128        // If TRUE the algorithm tries to cover all positive examples. Note that
129        // while this improves accuracy on the testing set,
130        // it may lead to overfitting
131        private boolean tryFullCoverage;
132
133        // algorithm will terminate immediately when a correct definition is found
134        private boolean stopOnFirstDefinition;
135
136        // the (approximated) value of noise within the examples
137        private double noise = 0.0;
138
139        private long partialSolutionStartTime;
140
141        private double startPosExamplesSize;
142        private int expressionTests = 0;
143
144        // the time needed until the best solution was found
145        private long timeBestSolutionFound = -1;
146
147        LiteralNodeConversionStrategy[] strategies = new LiteralNodeConversionStrategy[]{
148                        LiteralNodeConversionStrategy.MIN,
149                        LiteralNodeConversionStrategy.MAX,
150                        LiteralNodeConversionStrategy.MIN_MAX,
151                        LiteralNodeConversionStrategy.DATATYPE
152        };
153        private QueryExecutionFactory qef;
154
155        private Entailment entailment = Entailment.SIMPLE;
156
157        private int maxTreeDepth = 2;
158
159        private boolean useDisjunction = false;
160
161        private int nrOfThreads = Runtime.getRuntime().availableProcessors();
162
163        public QTL2DisjunctiveMultiThreaded() {}
164
165        public QTL2DisjunctiveMultiThreaded(PosNegLP learningProblem, AbstractReasonerComponent reasoner) {
166                super(learningProblem, reasoner);
167                loadModel();
168        }
169
170        public QTL2DisjunctiveMultiThreaded(PosNegLP lp, QueryExecutionFactory qef) {
171                super.learningProblem = lp;
172                this.lp = lp;
173                this.qef = qef;
174        }
175
176        public QTL2DisjunctiveMultiThreaded(PosNegLP lp, SparqlEndpointKS ks) {
177                this(lp, ks.getQueryExecutionFactory());
178        }
179
180//      public QTL2Disjunctive(PosNegLP lp, Model model) {
181//              this.learningProblem = lp;
182//              this.model = model;
183//      }
184
185        /**
186         * Copy constructor.
187         * @param qtl the QTL2Disjunctive instance
188         */
189        public QTL2DisjunctiveMultiThreaded(QTL2DisjunctiveMultiThreaded qtl) {
190                super(qtl.getLearningProblem(), qtl.getReasoner());
191                this.model = ModelFactory.createDefaultModel();
192                this.model.add(qtl.model);
193                this.beta = qtl.beta;
194                this.maxExecutionTimeInSeconds = qtl.maxExecutionTimeInSeconds;
195                this.maxTreeComputationTimeInSeconds = qtl.maxTreeComputationTimeInSeconds;
196                this.tryFullCoverage = qtl.tryFullCoverage;
197                this.stopOnFirstDefinition = qtl.stopOnFirstDefinition;
198        }
199
200        /* (non-Javadoc)
201         * @see org.dllearner.core.Component#init()
202         */
203        @Override
204        public void init() throws ComponentInitException {
205                logger.info("Initializing...");
206                if(!(learningProblem instanceof PosNegLP)){
207                        throw new IllegalArgumentException("Only PosNeg learning problems are supported");
208                }
209                lp = (PosNegLP) learningProblem;
210
211                // get query execution factory from KS
212                if(qef == null) {
213                        qef = ks.getQueryExecutionFactory();
214                }
215
216                if(treeFactory == null) {
217                        treeFactory = new QueryTreeFactoryBase();
218                }
219                cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef);
220
221                // set the used heuristic
222                if(heuristic == null){
223                        heuristic = new QueryTreeHeuristicSimple();
224                        heuristic.setPosExamplesWeight(beta);
225                }
226
227                if(entailment == Entailment.SIMPLE) {
228                        lggGenerator = new LGGGeneratorSimple();
229//                      lggGenerator = new LGGGeneratorExt();
230//                      ((LGGGeneratorExt)lggGenerator).setTreeFilters(Sets.newHashSet(new PredicateExistenceFilterDBpedia(null)));
231                } else if(entailment == Entailment.RDFS){
232                        lggGenerator = new LGGGeneratorRDFS(reasoner);
233                }
234
235                // generate the query trees
236                generateQueryTrees();
237
238                startPosExamplesSize = currentPosExampleTrees.size();
239
240                //console rendering of class expressions
241                StringRenderer.setRenderer(Rendering.MANCHESTER_SYNTAX);
242                StringRenderer.setShortFormProvider(new SimpleShortFormProvider());
243
244                //compute the LGG for all examples
245                //this allows us to prune all other trees because we can omit paths in trees which are contained in all positive
246                //as well as negative examples
247//              List<RDFResourceTree> allExamplesTrees = new ArrayList<RDFResourceTree>();
248//              allExamplesTrees.addAll(currentPosExampleTrees);
249//              allExamplesTrees.addAll(currentNegExampleTrees);
250//              RDFResourceTree lgg = lggGenerator.getLGG(allExamplesTrees);
251//              lgg.dump();
252                
253                initialized = true;
254                logger.info("...initialization finished.");
255        }
256
257        /**
258         * @param entailment the entailment to set
259         */
260        public void setEntailment(Entailment entailment) {
261                this.entailment = entailment;
262        }
263
264        public void setNrOfThreads(int nrOfThreads) {
265                this.nrOfThreads = nrOfThreads;
266        }
267
268        private void generateQueryTrees(){
269                logger.info("Generating trees...");
270                RDFResourceTree queryTree;
271
272                // positive examples
273                if(currentPosExampleTrees.isEmpty()){
274                        for (OWLIndividual ind : lp.getPositiveExamples()) {
275                                try {
276                                        Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), maxTreeDepth);
277//                                      cbd.write(new FileOutputStream("/tmp/dbpedia-" + ind.toStringID().substring(ind.toStringID().lastIndexOf('/') + 1) + ".ttl"), "TURTLE", null);
278                                        queryTree = treeFactory.getQueryTree(ind.toStringID(), cbd, maxTreeDepth);
279                                        tree2Individual.put(queryTree, ind);
280                                        currentPosExampleTrees.add(queryTree);
281                                        currentPosExamples.add(ind);
282                                        logger.debug(ind.toStringID());
283                                        logger.debug(queryTree.getStringRepresentation());
284                                } catch (Exception e) {
285                                        logger.error("Failed to generate tree for resource " + ind, e);
286                                        throw new RuntimeException();
287                                }
288                        }
289                }
290
291                // negative examples
292                if(currentNegExampleTrees.isEmpty()){
293                        for (OWLIndividual ind : lp.getNegativeExamples()) {
294                                try {
295                                        Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), maxTreeDepth);
296                                        queryTree = treeFactory.getQueryTree(ind.toStringID(), cbd, maxTreeDepth);
297                                        tree2Individual.put(queryTree, ind);
298                                        currentNegExampleTrees.add(queryTree);
299                                        currentNegExamples.add(ind);
300                                        logger.debug(ind.toStringID());
301                                        logger.debug(queryTree.getStringRepresentation());
302                                } catch (Exception e) {
303                                        logger.error("Failed to generate tree for resource " + ind, e);
304                                        throw new RuntimeException();
305                                }
306                        }
307                }
308                logger.info("...done.");
309        }
310
311        /* (non-Javadoc)
312         * @see org.dllearner.core.LearningAlgorithm#start()
313         */
314        @Override
315        public void start() {
316                if(currentPosExampleTrees.isEmpty()) {
317                        logger.info("No positive examples given!");
318                        return;
319                }
320
321                printSetup();
322
323                reset();
324                logger.info("Running...");
325                nanoStartTime = System.nanoTime();
326
327//              currentPosExampleTrees.stream().map(RDFResourceTree::getStringRepresentation).forEach(System.out::println);
328
329                // if noise=0 and there are no neg. examples, we can simply compute the LGG for all pos. examples
330                if(noise == 0 && currentNegExampleTrees.isEmpty()) {
331                        lggGenerator.setTimeout(getRemainingRuntimeInMilliseconds(), TimeUnit.MILLISECONDS);
332                        RDFResourceTree lgg = lggGenerator.getLGG(currentPosExampleTrees);
333                        Set<EvaluatedRDFResourceTree> solutions = evaluate(lgg, false);
334                        timeBestSolutionFound = getCurrentRuntimeInMilliSeconds();
335                        currentPartialSolutions.addAll(solutions);
336                        partialSolutions.addAll(solutions);
337                        currentBestSolution = solutions.iterator().next().asEvaluatedDescription();
338                } else {
339                        int i = 1;
340                        while(!terminationCriteriaSatisfied() && (useDisjunction || i == 1)){
341                                logger.info(i++ + ". iteration...");
342                                logger.info("#Remaining pos. examples:" + currentPosExampleTrees.size());
343                                logger.info("#Remaining neg. examples:" + currentNegExampleTrees.size());
344
345                                // compute best (partial) solution computed so far
346                                EvaluatedRDFResourceTree bestPartialSolution = computeBestPartialSolution();
347
348                                // add to set of partial solutions if criteria are satisfied
349                                if(bestPartialSolution.getScore() >= minimumTreeScore){
350
351                                        partialSolutions.add(bestPartialSolution);
352
353                                        // remove all examples covered by current partial solution
354                                        RDFResourceTree tree;
355                                        for (Iterator<RDFResourceTree> iterator = currentPosExampleTrees.iterator(); iterator.hasNext();) {
356                                                tree = iterator.next();
357                                                if(!bestPartialSolution.getFalseNegatives().contains(tree)){//a pos tree that is not covered
358                                                        iterator.remove();
359                                                        currentPosExamples.remove(tree2Individual.get(tree));
360                                                }
361                                        }
362                                        for (Iterator<RDFResourceTree> iterator = currentNegExampleTrees.iterator(); iterator.hasNext();) {
363                                                tree = iterator.next();
364                                                if(bestPartialSolution.getFalsePositives().contains(tree)){//a neg example that is covered
365                                                        iterator.remove();
366                                                        currentNegExamples.remove(tree2Individual.get(tree));
367                                                }
368                                        }
369
370                                        // (re)build the current combined solution from all partial solutions
371                                        currentBestSolution = buildCombinedSolution();
372
373                                        logger.info("combined accuracy: " + dFormat.format(currentBestSolution.getAccuracy()));
374                                } else {
375                                        String message = "No partial tree found which satisfies the minimal criteria.";
376                                        if(currentBestSolution != null) {
377                                                message += "- the best was: "
378                                                                + currentBestSolution.getDescription()
379                                                                + " with score " + currentBestSolution.getScore();
380                                        }
381                                        logger.info(message);
382                                }
383
384                        }
385                }
386
387                isRunning = false;
388
389                postProcess();
390
391                logger.info("Finished in {}ms.", getCurrentRuntimeInMilliSeconds());
392                logger.info("{} descriptions tested", expressionTests);
393                if(currentBestSolution != null) {
394                        logger.info("Combined solution:{}", currentBestSolution.getDescription().toString().replace("\n", ""));
395                        logger.info(currentBestSolution.getScore().toString());
396                } else {
397                        logger.info("Could not find a solution in the given time.");
398                }
399        }
400
401        /**
402         * This method can be called for clean up of the solutions and re-ranking.
403         */
404        private void postProcess() {
405                logger.trace("Post processing ...");
406                // pick solutions with same accuracy, i.e. in the pos only case
407                // covering the same number of positive examples
408                SortedSet<EvaluatedRDFResourceTree> solutions = getSolutions();
409                // pick solutions with accuracy above
410                // mas(maximum achievable score) - noise
411                List<EvaluatedRDFResourceTree> solutionsForPostProcessing = new ArrayList<>();
412                for (EvaluatedRDFResourceTree solution : solutions) {
413
414                        double accuracy = solution.getTreeScore().getAccuracy();
415
416                        double mas = heuristic.getMaximumAchievableScore(solution);
417
418                        double epsilon = 0.01;
419
420                        if(accuracy != mas && accuracy >= (mas - noise - epsilon)) {
421                                solutionsForPostProcessing.add(solution);
422                        }
423                }
424
425                logger.trace("Finished post processing.");
426        }
427
428        /**
429         * Compute a (partial) solution that covers as much positive examples as possible.
430         * @return a (partial) solution
431         */
432        private EvaluatedRDFResourceTree computeBestPartialSolution(){
433                logger.info("Computing best partial solution...");
434                bestCurrentScore = Double.NEGATIVE_INFINITY;
435                partialSolutionStartTime = System.currentTimeMillis();
436                initTodoList(currentPosExampleTrees, currentNegExampleTrees);
437
438                // generate id for each pos and neg example tree
439                TObjectIntMap<RDFResourceTree> index = new TObjectIntHashMap<>(this.currentPosExampleTrees.size() + this.currentNegExampleTrees.size());
440                int id = 1;
441                for (RDFResourceTree posTree : currentPosExampleTrees) {
442                        index.put(posTree, id++);
443                }
444                Set<Set<RDFResourceTree>> processedCombinations = new HashSet<>();
445
446                ExecutorService pool = Executors.newFixedThreadPool(nrOfThreads);
447
448                while(!partialSolutionTerminationCriteriaSatisfied()){
449                        logger.trace("ToDo list size: " + todoList.size());
450                        // pick best element from todo list
451                        EvaluatedRDFResourceTree currentElement = todoList.poll();
452                        final RDFResourceTree currentTree = currentElement.getTree();
453
454                        logger.trace("Next tree: {} ({})", currentElement.getBaseQueryTrees(), currentElement.getTreeScore());
455
456                        // generate the LGG between the chosen tree and each false negative resp. uncovered positive example
457                        Collection<RDFResourceTree> falseNegatives = currentElement.getFalseNegatives();
458
459                        if(falseNegatives.isEmpty()) { // if the current solution covers already all pos examples
460//                              addToSolutions(bestPartialSolutionTree);
461//                              bestPartialSolutionTree = currentElement;
462                        }
463
464                        List<CompletableFuture<Void>> list = falseNegatives.stream()
465                                        .filter(fn -> !processedCombinations.contains(Sets.union(currentElement.getBaseQueryTrees(), Sets.newHashSet(fn))))
466                                        .map(fn -> CompletableFuture.supplyAsync(() -> computePartialSolution(currentTree, fn, Sets.newTreeSet(Sets.union(currentElement.getBaseQueryTrees(), Sets.newHashSet(fn)))), pool))
467                                        .map(solutionFuture -> solutionFuture.thenAccept(solutions -> {
468                                                for (EvaluatedRDFResourceTree solution : solutions) {
469                                                        logger.trace("solution: {} ({})", solution.getBaseQueryTrees(), solution.getTreeScore());
470                                                        processedCombinations.add(solution.getBaseQueryTrees());
471                                                        expressionTests++;
472                                                        double score = solution.getScore();
473                                                        double mas = heuristic.getMaximumAchievableScore(solution);
474
475                                                        if (score >= bestCurrentScore) {
476                                                                if (score > bestCurrentScore) {
477                                                                        timeBestSolutionFound = getCurrentRuntimeInMilliSeconds();
478                                                                        logger.info("\tGot better solution after {}ms:" + solution.getTreeScore(), timeBestSolutionFound);
479//                                                                      logger.info("\t" + solutionAsString(solution.asEvaluatedDescription()));
480                                                                        bestCurrentScore = score;
481                                                                        bestPartialSolutionTree = solution;
482                                                                }
483                                                                // add to ToDo list, if not already contained in ToDo list or solution list
484                                                                if (bestCurrentScore == 1.0 || mas > score) {
485//                                                      todo(solution);
486                                                                }
487                                                        } else if (bestCurrentScore == 1.0 || mas >= bestCurrentScore) { // add to ToDo list if max. achievable score is higher
488//                                              todo(solution);
489                                                        } else {
490                                                                logger.trace("Too weak: {}", solution.getTreeScore());
491//                                              System.err.println(solution.getEvaluatedDescription());
492//                                              System.out.println("Too general");
493//                                              System.out.println("MAS=" + mas + "\nBest=" + bestCurrentScore);
494//                                              todo(solution);
495                                                        }
496                                                        todo(solution);
497                                                        addToSolutions(solution);
498                                                }
499                                        }))
500                                        .collect(Collectors.toList());
501                        list.forEach(c -> {
502                                try {
503                                        c.get();
504                                } catch (InterruptedException e) {
505                                        e.printStackTrace();
506                                } catch (ExecutionException e) {
507                                        e.printStackTrace();
508                                }
509                        });
510
511
512//                      while (it.hasNext() && !(useDisjunction && isPartialSolutionTimeExpired()) && !isTimeExpired()) {
513//                              RDFResourceTree uncoveredTree = it.next();
514//                              logger.trace("Uncovered tree: " + uncoveredTree);
515//                              // we should avoid the computation of lgg(t2,t1) if we already did lgg(t1,t2)
516//                              Set<RDFResourceTree> baseQueryTrees = Sets.newTreeSet(currentElement.getBaseQueryTrees());
517//                              baseQueryTrees.add(uncoveredTree);
518////                            String s = "";
519////                            for (RDFResourceTree queryTree : baseQueryTrees) {
520////                                    s += index.get(queryTree) + ",";
521////                            }
522////                            System.err.println(s);
523//                              if (!processedCombinations.add(baseQueryTrees)) {
524////                                    System.err.println("skipping");
525////                                    continue;
526//                              }
527//
528//                              PartialSolutionComputationTask callable = new PartialSolutionComputationTask(currentTree, uncoveredTree, baseQueryTrees);
529//                              Future<Set<EvaluatedRDFResourceTree>> future = pool.submit(callable);
530//                              set.add(future);
531//                      }
532
533//                      for (Future<Set<EvaluatedRDFResourceTree>> future : set) {
534//                              try {
535//                                      Set<EvaluatedRDFResourceTree> solutions = future.get();
536//
537//
538//                              } catch (InterruptedException e) {
539//                                      e.printStackTrace();
540//                              } catch (ExecutionException e) {
541//                                      e.printStackTrace();
542//                              }
543//                      }
544//                      addToSolutions(currentElement);
545                }
546
547                pool.shutdown();
548                try {
549                        pool.awaitTermination(1, TimeUnit.SECONDS);
550                } catch (InterruptedException e) {
551                        e.printStackTrace();
552                }
553
554                long endTime = System.currentTimeMillis();
555                logger.info("...finished computing best partial solution in " + (endTime-partialSolutionStartTime) + "ms.");
556                EvaluatedDescription bestPartialSolution = bestPartialSolutionTree.asEvaluatedDescription();
557
558                logger.info("Best partial solution: " + solutionAsString(bestPartialSolution) + "\n(" + bestPartialSolution.getScore() + ")");
559
560                logger.trace("LGG time: " + MonitorFactory.getTimeMonitor("lgg").getTotal() + "ms");
561                logger.trace("Avg. LGG time: " + MonitorFactory.getTimeMonitor("lgg").getAvg() + "ms");
562                logger.info("#LGG computations: " + MonitorFactory.getTimeMonitor("lgg").getHits());
563
564                logger.trace("Subsumption test time: " + MonitorFactory.getTimeMonitor("subsumption").getTotal() + "ms");
565                logger.trace("Avg. subsumption test time: " + MonitorFactory.getTimeMonitor("subsumption").getAvg() + "ms");
566                logger.trace("#Subsumption tests: " + MonitorFactory.getTimeMonitor("subsumption").getHits());
567
568                return bestPartialSolutionTree;
569        }
570
571        private String solutionAsString(EvaluatedDescription ed) {
572                return renderer.render(ed.getDescription()).replace("\n", "").replaceAll("\\\\s{2,}", " ");
573        }
574
575        private boolean addToSolutions(EvaluatedRDFResourceTree solution) {
576                for (EvaluatedRDFResourceTree partialSolution : currentPartialSolutions) {
577                        if(QueryTreeUtils.sameTrees(partialSolution.getTree(), solution.getTree())) {
578                                return false;
579                        }
580                }
581                return currentPartialSolutions.add(solution);
582        }
583
584        /**
585         * Initializes the ToDo list with all distinct trees contained in the given list of positive
586         * example trees {@code posExamples} and negative example trees {@code negExamples}.
587         * First, distinct trees are computed and afterwards, for each tree an initial score will be
588         *  computed.
589         * @param posExamples the positive example trees
590         * @param negExamples the negative example trees
591         */
592        private void initTodoList(List<RDFResourceTree> posExamples, List<RDFResourceTree> negExamples){
593                todoList = new PriorityBlockingQueue<>();
594//              EvaluatedRDFResourceTree dummy = new EvaluatedRDFResourceTree(new QueryTreeImpl<String>((N)"TOP"), trees, 0d);
595//              todoList.add(dummy);
596
597                // compute distinct trees, i.e. check if some of the trees already cover others
598                Collection<RDFResourceTree> distinctTrees = new ArrayList<>();
599                for (RDFResourceTree queryTree : posExamples) {
600                        boolean distinct = true;
601                        for (RDFResourceTree otherTree : distinctTrees) {
602                                if(!queryTree.equals(otherTree)){
603                                        if(QueryTreeUtils.sameTrees(queryTree, otherTree)){
604                                                distinct = false;
605                                                break;
606                                        }
607                                }
608                        }
609                        if(distinct){
610                                distinctTrees.add(queryTree);
611                        }
612                }
613
614                // compute an initial score
615                for (RDFResourceTree queryTree : distinctTrees) {
616                        EvaluatedRDFResourceTree evaluatedQueryTree = evaluateSimple(queryTree, false);
617                        evaluatedQueryTree.setBaseQueryTrees(Collections.singleton(queryTree));
618                        todoList.add(evaluatedQueryTree);
619                }
620        }
621
622        /**
623         * @return TRUE if the query tree is already contained in the solutions or
624         * todo list, otherwise FALSE
625         */
626        private boolean isRedundant(RDFResourceTree tree) {
627                //check if not already contained in todo list
628                for (EvaluatedRDFResourceTree evTree : todoList) {
629                        if(QueryTreeUtils.sameTrees(tree, evTree.getTree())){
630                                logger.trace("Not added to TODO list: Already contained in.");
631//                              logger.trace(evTree.getBaseQueryTrees().toString());
632                                return true;
633                        }
634                }
635
636                //check if not already contained in solutions
637                for (EvaluatedRDFResourceTree evTree : currentPartialSolutions) {
638                        if(QueryTreeUtils.sameTrees(tree, evTree.getTree())){
639                                logger.trace("Not added to partial solutions list: Already contained in.");
640                                return true;
641                        }
642                }
643                return false;
644        }
645
646        /**
647         * Add tree to ToDo list if not already contained in that list or the solutions.
648         * @param solution the solution
649         */
650        private void todo(EvaluatedRDFResourceTree solution){
651                logger.trace("Added to TODO list.");
652                todoList.add(solution);
653        }
654
655        private EvaluatedRDFResourceTree evaluateSimple(RDFResourceTree tree, boolean useSpecifity){
656                //1. get a score for the coverage = recall oriented
657                //compute positive examples which are not covered by LGG
658                List<RDFResourceTree> uncoveredPositiveExampleTrees = getUncoveredTrees(tree, currentPosExampleTrees);
659                Set<OWLIndividual> uncoveredPosExamples = new TreeSet<>();
660                for (RDFResourceTree queryTree : uncoveredPositiveExampleTrees) {
661                        uncoveredPosExamples.add(tree2Individual.get(queryTree));
662                }
663                //compute negative examples which are covered by LGG
664                Collection<RDFResourceTree> coveredNegativeExampleTrees = getCoveredTrees(tree, currentNegExampleTrees);
665                Set<OWLIndividual> coveredNegExamples = new TreeSet<>();
666                for (RDFResourceTree queryTree : coveredNegativeExampleTrees) {
667                        coveredNegExamples.add(tree2Individual.get(queryTree));
668                }
669                //compute score
670                int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size();
671                double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size();
672                double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0)
673                                                ? 0
674                                                : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size());
675
676                double coverageScore = Heuristics.getFScore(recall, precision, beta);
677
678                //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented
679                int nrOfSpecificNodes = 0;
680                for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)) {
681                        if(!childNode.isVarNode()){
682                                nrOfSpecificNodes++;
683                        }
684                }
685                double specifityScore = 0d;
686                if(useSpecifity){
687                        specifityScore = Math.log(nrOfSpecificNodes);
688                } else {
689                        specifityScore = 1 / (double) nrOfSpecificNodes;
690                }
691
692                //3.compute the total score
693                double score = coverageWeight * coverageScore + specifityWeight * specifityScore;
694
695                QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore,
696                                new TreeSet<>(Sets.difference(currentPosExamples, uncoveredPosExamples)), uncoveredPosExamples,
697                                coveredNegExamples, new TreeSet<>(Sets.difference(currentNegExamples, coveredNegExamples)),
698                                specifityScore, nrOfSpecificNodes);
699
700//              QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore,
701//                              null,null,null,null,
702//                              specifityScore, nrOfSpecificNodes);
703
704                EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore);
705
706                //TODO use only the heuristic to compute the score
707                score = heuristic.getScore(evaluatedTree);
708                queryTreeScore.setScore(score);
709                queryTreeScore.setAccuracy(score);
710
711                return evaluatedTree;
712        }
713
714        /**
715         * Evaluated a query tree such that it returns a set of evaluated query trees.
716         * A set is returned because there are several ways how to convert literal nodes.
717         * @param tree the query tree
718         * @param useSpecifity whether to use SPECIFITY as measure
719         * @return a set of evaluated query trees
720         */
721        private Set<EvaluatedRDFResourceTree> evaluate(RDFResourceTree tree, boolean useSpecifity){
722                Set<EvaluatedRDFResourceTree> evaluatedTrees = new TreeSet<>();
723
724                LiteralNodeSubsumptionStrategy[] strategies = LiteralNodeSubsumptionStrategy.values();
725                strategies = new LiteralNodeSubsumptionStrategy[]{
726                                LiteralNodeSubsumptionStrategy.DATATYPE,
727//                              LiteralNodeSubsumptionStrategy.INTERVAL,
728//                              LiteralNodeSubsumptionStrategy.MIN,
729//                              LiteralNodeSubsumptionStrategy.MAX,
730                                };
731                for (LiteralNodeSubsumptionStrategy strategy : strategies) {
732                        // 1. get a score for the coverage = recall oriented
733                        List<RDFResourceTree> uncoveredPositiveExampleTrees = new ArrayList<>();
734                        List<RDFResourceTree> coveredNegativeExampleTrees = new ArrayList<>();
735
736                        // compute positive examples which are not covered by LGG
737                        for (RDFResourceTree posTree : currentPosExampleTrees) {
738//                              System.out.print(currentPosExampleTrees.indexOf(posTree) + ":");
739                                if(!QueryTreeUtils.isSubsumedBy(posTree, tree, entailment, reasoner)){
740//                                      System.err.println(posTree.getStringRepresentation(true));System.err.println(tree.getStringRepresentation(true));
741//                                      System.out.println("FALSE");
742                                        uncoveredPositiveExampleTrees.add(posTree);
743                                } else {
744//                                      System.out.println("TRUE");
745                                }
746                        }
747
748                        // compute negative examples which are covered by LGG
749                        for (RDFResourceTree negTree : currentNegExampleTrees) {
750                                if(QueryTreeUtils.isSubsumedBy(negTree, tree, entailment, reasoner)){
751                                        coveredNegativeExampleTrees.add(negTree);
752                                }
753                        }
754
755                        // convert to individuals
756                        Set<OWLIndividual> uncoveredPosExamples = asIndividuals(uncoveredPositiveExampleTrees);
757                        Set<OWLIndividual> coveredNegExamples = asIndividuals(coveredNegativeExampleTrees);
758
759                        // compute score
760                        int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size();
761                        double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size();
762                        double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0)
763                                                        ? 0
764                                                        : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size());
765
766                        double coverageScore = Heuristics.getFScore(recall, precision, beta);
767
768                        // 2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented
769                        int nrOfSpecificNodes = 0;
770                        for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)) {
771                                if(!childNode.isVarNode()){
772                                        nrOfSpecificNodes++;
773                                }
774                        }
775                        double specifityScore = 0d;
776                        if(useSpecifity){
777                                specifityScore = Math.log(nrOfSpecificNodes);
778                        }
779
780                        // 3.compute the total score
781                        double score = coverageWeight * coverageScore + specifityWeight * specifityScore;
782
783                        QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore,
784                                        new TreeSet<>(Sets.difference(currentPosExamples, uncoveredPosExamples)), uncoveredPosExamples,
785                                        coveredNegExamples, new TreeSet<>(Sets.difference(currentNegExamples, coveredNegExamples)),
786                                        specifityScore, nrOfSpecificNodes);
787
788                        EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore);
789
790                        //TODO use only the heuristic to compute the score
791                        score = heuristic.getScore(evaluatedTree);
792                        queryTreeScore.setScore(score);
793                        queryTreeScore.setAccuracy(score);
794
795                        evaluatedTrees.add(evaluatedTree);
796                }
797
798                return evaluatedTrees;
799        }
800
801        /**
802         * Evaluated a query tree such that it returns a set of evaluated query trees.
803         * A set is returned because there are several ways how to convert literal nodes.
804         * @param tree the query tree
805         * @param useSpecifity whether to use SPECIFITY as measure
806         * @return a set of evaluated query trees
807         */
808        private Set<EvaluatedRDFResourceTree> evaluate2(RDFResourceTree tree, boolean useSpecifity){
809                Set<EvaluatedRDFResourceTree> evaluatedTrees = new TreeSet<>();
810
811                //test different strategies on the conversion of literal nodes
812                Set<OWLClassExpression> combinations = new HashSet<>();
813
814                for (LiteralNodeConversionStrategy strategy : strategies) {
815                        OWLClassExpression ce = QueryTreeUtils.toOWLClassExpression(tree);
816                        combinations.add(ce);
817                }
818                //compute all combinations of different types of facets
819//              OWLClassExpression ce = tree.asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION);
820//              combinations = ce.accept(new ClassExpressionLiteralCombination());
821                for (OWLClassExpression c : combinations) {
822                        //convert to individuals
823                        SortedSet<OWLIndividual> coveredExamples = reasoner.getIndividuals(c);
824                        Set<OWLIndividual> coveredPosExamples = new TreeSet<>(Sets.intersection(currentPosExamples, coveredExamples));
825                        Set<OWLIndividual> uncoveredPosExamples = new TreeSet<>(Sets.difference(currentPosExamples, coveredExamples));
826                        Set<OWLIndividual> coveredNegExamples = new TreeSet<>(Sets.intersection(currentNegExamples, coveredExamples));
827                        Set<OWLIndividual> uncoveredNegExamples = new TreeSet<>(Sets.difference(currentNegExamples, coveredExamples));
828
829                        //compute score
830                        double recall = coveredPosExamples.size() / (double)currentPosExamples.size();
831                        double precision = (coveredNegExamples.size() + coveredPosExamples.size() == 0)
832                                                        ? 0
833                                                        : coveredPosExamples.size() / (double)(coveredPosExamples.size() + coveredNegExamples.size());
834
835                        double coverageScore = Heuristics.getFScore(recall, precision, beta);
836
837                        //2. get a score for the specificity of the query, i.e. how many edges/nodes = precision oriented
838                        int nrOfSpecificNodes = 0;
839                        for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)){
840                                if(!childNode.isVarNode()){
841                                        nrOfSpecificNodes++;
842                                }
843                        }
844                        double specifityScore = 0d;
845                        if(useSpecifity){
846                                specifityScore = Math.log(nrOfSpecificNodes);
847                        }
848
849                        //3.compute the total score
850                        double score = coverageWeight * coverageScore + specifityWeight * specifityScore;
851
852                        QueryTreeScore queryTreeScore = new QueryTreeScore(
853                                        score, coverageScore,
854                                        coveredPosExamples, uncoveredPosExamples,
855                                        coveredNegExamples, uncoveredNegExamples,
856                                        specifityScore, nrOfSpecificNodes);
857
858                        //TODO use only the heuristic to compute the score
859                        EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree,
860                                        asQueryTrees(uncoveredPosExamples), asQueryTrees(coveredNegExamples), queryTreeScore);
861                        score = heuristic.getScore(evaluatedTree);
862                        queryTreeScore.setScore(score);
863                        queryTreeScore.setAccuracy(score);
864
865
866                        EvaluatedDescription evaluatedDescription = new EvaluatedDescription(c, queryTreeScore);
867
868                        evaluatedTree.setDescription(evaluatedDescription);
869
870                        evaluatedTrees.add(evaluatedTree);
871                }
872                return evaluatedTrees;
873        }
874
875        private EvaluatedDescription<? extends Score> buildCombinedSolution(){
876                EvaluatedDescription<? extends Score> bestCombinedSolution = null;
877                double bestScore = Double.NEGATIVE_INFINITY;
878                LiteralNodeConversionStrategy[] strategies = LiteralNodeConversionStrategy.values();
879                strategies = new LiteralNodeConversionStrategy[]{LiteralNodeConversionStrategy.DATATYPE};
880                for (LiteralNodeConversionStrategy strategy : strategies) {
881                        EvaluatedDescription<? extends Score> combinedSolution;
882                        if(partialSolutions.size() == 1){
883                                combinedSolution = partialSolutions.get(0).asEvaluatedDescription();
884                        } else {
885                                Set<OWLClassExpression> disjuncts = new TreeSet<>();
886
887                                Set<OWLIndividual> posCovered = new HashSet<>();
888                                Set<OWLIndividual> negCovered = new HashSet<>();
889
890                                //build the union of all class expressions
891                                OWLClassExpression partialDescription;
892                                for (EvaluatedRDFResourceTree partialSolution : partialSolutions) {
893                                        partialDescription = partialSolution.asEvaluatedDescription().getDescription();
894                                        disjuncts.add(partialDescription);
895                                        posCovered.addAll(partialSolution.getTreeScore().getCoveredPositives());
896                                        negCovered.addAll(partialSolution.getTreeScore().getCoveredNegatives());
897                                }
898                                OWLClassExpression unionDescription = dataFactory.getOWLObjectUnionOf(disjuncts);
899
900                                Set<OWLIndividual> posNotCovered = Sets.difference(lp.getPositiveExamples(), posCovered);
901                                Set<OWLIndividual> negNotCovered = Sets.difference(lp.getNegativeExamples(), negCovered);
902
903                                //compute the coverage
904                                double recall = posCovered.size() / (double)lp.getPositiveExamples().size();
905                                double precision = (posCovered.size() + negCovered.size() == 0)
906                                                                ? 0
907                                                                : posCovered.size() / (double)(posCovered.size() + negCovered.size());
908
909                                double coverageScore = Heuristics.getFScore(recall, precision, beta);
910
911//                              ScoreTwoValued score = new ScoreTwoValued(posCovered, posNotCovered, negCovered, negNotCovered);
912//                              score.setAccuracy(coverageScore);
913                                QueryTreeScore score = new QueryTreeScore(coverageScore, coverageScore, posCovered, posNotCovered, negCovered, negNotCovered, -1, -1);
914
915                                combinedSolution = new EvaluatedDescription(unionDescription, score);
916                        }
917                        if(combinedSolution.getAccuracy() > bestScore){
918                                bestCombinedSolution = combinedSolution;
919                                bestCurrentScore = combinedSolution.getAccuracy();
920                        }
921                }
922                return bestCombinedSolution;
923        }
924
925        private void reset(){
926                stop = false;
927                isRunning = true;
928
929                currentBestSolution = null;
930                partialSolutions = new ArrayList<>();
931
932                bestCurrentScore = minimumTreeScore;
933
934                MonitorFactory.getTimeMonitor("lgg").reset();
935                nanoStartTime = System.nanoTime();
936        }
937
938        /* (non-Javadoc)
939         * @see org.dllearner.core.StoppableLearningAlgorithm#stop()
940         */
941        @Override
942        public void stop() {
943                stop = true;
944        }
945
946        /* (non-Javadoc)
947         * @see org.dllearner.core.AbstractCELA#getCurrentlyBestDescription()
948         */
949        @Override
950        public OWLClassExpression getCurrentlyBestDescription() {
951                return currentBestSolution.getDescription();
952        }
953
954        /* (non-Javadoc)
955         * @see org.dllearner.core.AbstractCELA#getCurrentlyBestEvaluatedDescription()
956         */
957        @Override
958        public EvaluatedDescription getCurrentlyBestEvaluatedDescription() {
959                return currentBestSolution;
960        }
961
962        /* (non-Javadoc)
963         * @see org.dllearner.core.StoppableLearningAlgorithm#isRunning()
964         */
965        @Override
966        public boolean isRunning() {
967                return isRunning;
968        }
969
970//      @Autowired
971//      public void setLearningProblem(PosNegLP learningProblem) {
972//              this.lp = learningProblem;
973//      }
974
975//      @Autowired
976        @Override
977        public void setReasoner(AbstractReasonerComponent reasoner){
978                super.setReasoner(reasoner);
979//              loadModel();
980        }
981
982        private void loadModel(){
983                model = ModelFactory.createDefaultModel();
984                for (KnowledgeSource ks : reasoner.getSources()) {
985                        if(ks instanceof OWLFile){
986                                try {
987                                        model.read(((OWLFile) ks).getURL().openStream(), null);
988                                } catch (IOException e) {
989                                        e.printStackTrace();
990                                }
991                        } else if(ks instanceof OWLAPIOntology){
992                                ByteArrayInputStream bais = new ByteArrayInputStream(((OWLAPIOntology) ks).getConverter().convert(((OWLAPIOntology) ks).getOntology()));
993                                model.read(bais, null);
994                                try {
995                                        bais.close();
996                                } catch (IOException e) {
997                                        e.printStackTrace();
998                                }
999                        }
1000                }
1001        }
1002
1003        private Set<OWLIndividual> asIndividuals(Collection<RDFResourceTree> trees){
1004                Set<OWLIndividual> individuals = new HashSet<>(trees.size());
1005                for (RDFResourceTree queryTree : trees) {
1006                        individuals.add(tree2Individual.get(queryTree));
1007                }
1008                return individuals;
1009        }
1010
1011        private Set<RDFResourceTree> asQueryTrees(Collection<OWLIndividual> individuals){
1012                Set<RDFResourceTree> trees = new HashSet<>(individuals.size());
1013                for (OWLIndividual ind : individuals) {
1014                        trees.add(tree2Individual.inverse().get(ind));
1015                }
1016                return trees;
1017        }
1018
1019        /**
1020         * Computes all trees from the given list {@code allTrees} which are subsumed by {@code tree}.
1021         * @param tree the tree
1022         * @param trees all trees
1023         * @return all trees from the given list {@code allTrees} which are subsumed by {@code tree}
1024         */
1025        private List<RDFResourceTree> getCoveredTrees(RDFResourceTree tree, List<RDFResourceTree> trees){
1026                List<RDFResourceTree> coveredTrees = new ArrayList<>();
1027                for (RDFResourceTree queryTree : trees) {
1028                        if(QueryTreeUtils.isSubsumedBy(queryTree, tree)){
1029                                coveredTrees.add(queryTree);
1030                        }
1031                }
1032                return coveredTrees;
1033        }
1034
1035        /**
1036         * Computes all trees from the given list {@code trees} which are not subsumed by {@code tree}.
1037         * @param tree the tree
1038         * @param trees the trees
1039         * @return all trees from the given list {@code trees} which are not subsumed by {@code tree}.
1040         */
1041        private List<RDFResourceTree> getUncoveredTrees(RDFResourceTree tree, List<RDFResourceTree> trees){
1042                List<RDFResourceTree> uncoveredTrees = new ArrayList<>();
1043                for (RDFResourceTree queryTree : trees) {
1044                        if(!QueryTreeUtils.isSubsumedBy(queryTree, tree)){
1045                                uncoveredTrees.add(queryTree);
1046                        }
1047                }
1048                return uncoveredTrees;
1049        }
1050
1051        private boolean terminationCriteriaSatisfied() {
1052                //stop was called or time expired
1053                if(stop || isTimeExpired()){
1054                        return true;
1055                }
1056
1057                // stop if there are no more positive examples to cover
1058                if (stopOnFirstDefinition && currentPosExamples.isEmpty()) {
1059                        return true;
1060                }
1061
1062                // we stop when the score of the last tree added is too low
1063                // (indicating that the algorithm could not find anything appropriate
1064                // in the timeframe set)
1065                if (bestCurrentScore < minimumTreeScore) {
1066                        return true;
1067                }
1068
1069                // stop when almost all positive examples have been covered
1070                if (tryFullCoverage) {
1071                        return false;
1072                } else {
1073                        int maxPosRemaining = (int) Math.ceil(startPosExamplesSize * 0.05d);
1074                        return (currentPosExamples.size() <= maxPosRemaining);
1075                }
1076        }
1077
1078        private boolean partialSolutionTerminationCriteriaSatisfied(){
1079                return stop || todoList.isEmpty() || currentPosExampleTrees.isEmpty() || (useDisjunction && isPartialSolutionTimeExpired()) || isTimeExpired();
1080        }
1081
1082        private boolean isPartialSolutionTimeExpired(){
1083                return maxTreeComputationTimeInSeconds > 0 && getRemainingPartialSolutionTime() > 0;
1084        }
1085
1086        private long getRemainingPartialSolutionTime() {
1087                return (long) (maxTreeComputationTimeInSeconds  - (System.currentTimeMillis() - partialSolutionStartTime) / 1000);
1088        }
1089
1090        /**
1091         * Shows the current setup of the algorithm.
1092         */
1093        private void printSetup(){
1094                String setup = "Setup:";
1095                setup += "\n#Pos. examples:" + currentPosExampleTrees.size();
1096                setup += "\n#Neg. examples:" + currentNegExampleTrees.size();
1097                setup += "\nHeuristic:" + heuristic.getHeuristicType().name();
1098                setup += "\nNoise value=" + noise;
1099                setup += "\nbeta=" + beta;
1100                logger.info(setup);
1101        }
1102
1103        /**
1104         * @param noisePercentage the noisePercentage to set
1105         */
1106        public void setNoisePercentage(double noisePercentage) {
1107                this.noisePercentage = noisePercentage;
1108        }
1109
1110        /**
1111         * @param noise the noise to set
1112         */
1113        public void setNoise(double noise) {
1114                this.noise = noise;
1115        }
1116
1117        /**
1118         * Default value is 1. Lower values force importance of covering positive examples.
1119         * @param beta the beta to set
1120         */
1121        public void setBeta(double beta) {
1122                this.beta = beta;
1123        }
1124
1125        /**
1126         * Set the max. execution time for the computation of a partial solution. If this value isn't set, the
1127         * max. algorithm runtime will be used, thus, in worst case only one partial solution was computed.
1128         *
1129         * @param maxTreeComputationTimeInSeconds the max. computation for a partial solution tree
1130         */
1131        public void setMaxTreeComputationTimeInSeconds(double maxTreeComputationTimeInSeconds) {
1132                this.maxTreeComputationTimeInSeconds = maxTreeComputationTimeInSeconds;
1133        }
1134
1135        /**
1136         * @return the heuristic
1137         */
1138        public QueryTreeHeuristic getHeuristic() {
1139                return heuristic;
1140        }
1141
1142        /**
1143         * @param heuristic the heuristic to set
1144         */
1145        public void setHeuristic(QueryTreeHeuristic heuristic) {
1146                this.heuristic = heuristic;
1147        }
1148
1149        /**
1150         * @param treeFactory the treeFactory to set
1151         */
1152        public void setTreeFactory(QueryTreeFactory treeFactory) {
1153                this.treeFactory = treeFactory;
1154        }
1155        
1156        public EvaluatedRDFResourceTree getBestSolution(){
1157                return currentPartialSolutions.last();
1158        }
1159        
1160        public SortedSet<EvaluatedRDFResourceTree> getSolutions(){
1161                return currentPartialSolutions;
1162        }
1163        
1164        public List<EvaluatedRDFResourceTree> getSolutionsAsList(){
1165                //              Collections.sort(list, Collections.reverseOrder());
1166                return new ArrayList<>(currentPartialSolutions);
1167        }
1168        
1169        /**
1170         * @param positiveExampleTrees the positive example trees to set
1171         */
1172        public void setPositiveExampleTrees(Map<OWLIndividual,RDFResourceTree> positiveExampleTrees) {
1173                this.currentPosExampleTrees = new ArrayList<>(positiveExampleTrees.values());
1174                this.currentPosExamples = new HashSet<>(positiveExampleTrees.keySet());
1175                
1176                for (Entry<OWLIndividual, RDFResourceTree> entry : positiveExampleTrees.entrySet()) {
1177                        OWLIndividual ind = entry.getKey();
1178                        RDFResourceTree tree = entry.getValue();
1179                        tree2Individual.put(tree, ind);
1180                }
1181        }
1182        
1183        /**
1184         * @param negativeExampleTrees the negative example trees to set
1185         */
1186        public void setNegativeExampleTrees(Map<OWLIndividual,RDFResourceTree> negativeExampleTrees) {
1187                this.currentNegExampleTrees = new ArrayList<>(negativeExampleTrees.values());
1188                this.currentNegExamples = new HashSet<>(negativeExampleTrees.keySet());
1189                
1190                for (Entry<OWLIndividual, RDFResourceTree> entry : negativeExampleTrees.entrySet()) {
1191                        OWLIndividual ind = entry.getKey();
1192                        RDFResourceTree tree = entry.getValue();
1193                        tree2Individual.put(tree, ind);
1194                }
1195        }
1196        
1197        /**
1198         * @param ks the ks to set
1199         */
1200        @Autowired
1201        public void setKs(SparqlEndpointKS ks) {
1202                this.ks = ks;
1203        }
1204        
1205        /**
1206         * @param maxTreeDepth the maximum depth of the trees, if those have to be generated
1207         * first. The default depth is 2.
1208         */
1209        public void setMaxTreeDepth(int maxTreeDepth) {
1210                this.maxTreeDepth = maxTreeDepth;
1211        }
1212        
1213        /**
1214         * @return the runtime in ms until the best solution was found
1215         */
1216        public long getTimeBestSolutionFound() {
1217                return timeBestSolutionFound;
1218        }
1219        
1220        /* (non-Javadoc)
1221         * @see java.lang.Object#clone()
1222         */
1223        @Override
1224        public Object clone() throws CloneNotSupportedException {
1225                super.clone();
1226                return new QTL2DisjunctiveMultiThreaded(this);
1227        }
1228
1229        private Set<EvaluatedRDFResourceTree> computePartialSolution(RDFResourceTree tree1, RDFResourceTree tree2, Set<RDFResourceTree> baseQueryTrees) {
1230                try {
1231//                      System.err.println(baseQueryTrees);
1232
1233                        LGGGeneratorSimple lggGenerator = new LGGGeneratorSimple();
1234                        // compute the LGG
1235                        MonitorFactory.getTimeMonitor("lgg").start();
1236                        lggGenerator.setTimeout(getRemainingPartialSolutionTime(), TimeUnit.SECONDS);
1237                        RDFResourceTree lgg = lggGenerator.getLGG(tree1, tree2);
1238                        MonitorFactory.getTimeMonitor("lgg").stop();
1239//                      System.out.println("COMPLETE:" + ((LGGGeneratorSimple)lggGenerator).isComplete());
1240//                              logger.info("LGG: "  + lgg.getStringRepresentation());
1241
1242                        // redundancy check
1243                        boolean redundant = isRedundant(lgg);
1244                        if (redundant) {
1245                logger.trace("redundant");
1246                return Collections.emptySet();
1247            }
1248
1249                        // evaluate the LGG
1250                        Set<EvaluatedRDFResourceTree> solutions = evaluate(lgg, true);
1251                        solutions.forEach(s -> s.setBaseQueryTrees(baseQueryTrees));
1252
1253                        return solutions;
1254                } catch (Exception e) {
1255                        e.printStackTrace();
1256                }
1257                return null;
1258        }
1259}