001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl; 020 021import com.google.common.collect.BiMap; 022import com.google.common.collect.HashBiMap; 023import com.google.common.collect.Sets; 024import com.jamonapi.MonitorFactory; 025import gnu.trove.map.TObjectIntMap; 026import gnu.trove.map.hash.TObjectIntHashMap; 027import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 028import org.apache.jena.rdf.model.Model; 029import org.apache.jena.rdf.model.ModelFactory; 030import org.dllearner.algorithms.qtl.datastructures.impl.EvaluatedRDFResourceTree; 031import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; 032import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeSubsumptionStrategy; 033import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 034import org.dllearner.algorithms.qtl.heuristics.QueryTreeHeuristic; 035import org.dllearner.algorithms.qtl.heuristics.QueryTreeHeuristicSimple; 036import org.dllearner.algorithms.qtl.impl.QueryTreeFactory; 037import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase; 038import org.dllearner.algorithms.qtl.operations.lgg.*; 039import org.dllearner.algorithms.qtl.util.Entailment; 040import org.dllearner.algorithms.qtl.util.filters.PredicateExistenceFilterDBpedia; 041import org.dllearner.core.*; 042import org.dllearner.core.StringRenderer.Rendering; 043import org.dllearner.core.config.ConfigOption; 044import org.dllearner.kb.OWLAPIOntology; 045import org.dllearner.kb.OWLFile; 046import org.dllearner.kb.SparqlEndpointKS; 047import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; 048import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; 049import org.dllearner.learningproblems.Heuristics; 050import org.dllearner.learningproblems.PosNegLP; 051import org.dllearner.learningproblems.QueryTreeScore; 052import org.semanticweb.owlapi.model.OWLClassExpression; 053import org.semanticweb.owlapi.model.OWLIndividual; 054import org.semanticweb.owlapi.util.SimpleShortFormProvider; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057import org.springframework.beans.factory.annotation.Autowired; 058 059import java.io.ByteArrayInputStream; 060import java.io.IOException; 061import java.text.DecimalFormat; 062import java.util.*; 063import java.util.Map.Entry; 064import java.util.concurrent.*; 065import java.util.stream.Collectors; 066 067/** 068 * A tree-based algorithm ... \todo add explanation 069 */ 070@ComponentAnn(name="query tree learner with noise (disjunctive) - multi-threaded", shortName="qtl2dismt", version=0.8) 071public class QTL2DisjunctiveMultiThreaded extends AbstractCELA implements Cloneable{ 072 073 private static final Logger logger = LoggerFactory.getLogger(QTL2DisjunctiveMultiThreaded.class); 074 private final DecimalFormat dFormat = new DecimalFormat("0.00"); 075 076 private SparqlEndpointKS ks; 077 078// private LGGGenerator2 lggGenerator = new LGGGeneratorSimple(); 079 private AbstractLGGGenerator lggGenerator; 080 081 private QueryTreeFactory treeFactory; 082 private ConciseBoundedDescriptionGenerator cbdGen; 083 084 private Queue<EvaluatedRDFResourceTree> todoList; 085 private SortedSet<EvaluatedRDFResourceTree> currentPartialSolutions = new ConcurrentSkipListSet<>(); 086 087 private double bestCurrentScore = 0d; 088 private EvaluatedRDFResourceTree bestPartialSolutionTree; 089 090 private List<RDFResourceTree> currentPosExampleTrees = new ArrayList<>(); 091 private List<RDFResourceTree> currentNegExampleTrees = new ArrayList<>(); 092 private Set<OWLIndividual> currentPosExamples = new HashSet<>(); 093 private Set<OWLIndividual> currentNegExamples = new HashSet<>(); 094 095 private BiMap<RDFResourceTree, OWLIndividual> tree2Individual = HashBiMap.create(); 096 097 private PosNegLP lp; 098 099 private Model model; 100 101 private volatile boolean stop; 102 private boolean isRunning; 103 104 private List<EvaluatedRDFResourceTree> partialSolutions; 105 106 private EvaluatedDescription<? extends Score> currentBestSolution; 107 108 private QueryTreeHeuristic heuristic; 109 110 //Parameters 111 @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples") 112 private double noisePercentage = 0.0; 113 114 private double coverageWeight = 0.8; 115 private double specifityWeight = 0.1; 116 117 private double minCoveredPosExamplesFraction = 0.2; 118 119 // maximum execution time to compute a part of the solution 120 private double maxTreeComputationTimeInSeconds = 10; 121 122 @ConfigOption(defaultValue = "1", description = "how important it is not to cover negatives") 123 private double beta = 1; 124 125 // minimum score a query tree must have to be part of the solution 126 private double minimumTreeScore = 0.3; 127 128 // If TRUE the algorithm tries to cover all positive examples. Note that 129 // while this improves accuracy on the testing set, 130 // it may lead to overfitting 131 private boolean tryFullCoverage; 132 133 // algorithm will terminate immediately when a correct definition is found 134 private boolean stopOnFirstDefinition; 135 136 // the (approximated) value of noise within the examples 137 private double noise = 0.0; 138 139 private long partialSolutionStartTime; 140 141 private double startPosExamplesSize; 142 private int expressionTests = 0; 143 144 // the time needed until the best solution was found 145 private long timeBestSolutionFound = -1; 146 147 LiteralNodeConversionStrategy[] strategies = new LiteralNodeConversionStrategy[]{ 148 LiteralNodeConversionStrategy.MIN, 149 LiteralNodeConversionStrategy.MAX, 150 LiteralNodeConversionStrategy.MIN_MAX, 151 LiteralNodeConversionStrategy.DATATYPE 152 }; 153 private QueryExecutionFactory qef; 154 155 private Entailment entailment = Entailment.SIMPLE; 156 157 private int maxTreeDepth = 2; 158 159 private boolean useDisjunction = false; 160 161 private int nrOfThreads = Runtime.getRuntime().availableProcessors(); 162 163 public QTL2DisjunctiveMultiThreaded() {} 164 165 public QTL2DisjunctiveMultiThreaded(PosNegLP learningProblem, AbstractReasonerComponent reasoner) { 166 super(learningProblem, reasoner); 167 loadModel(); 168 } 169 170 public QTL2DisjunctiveMultiThreaded(PosNegLP lp, QueryExecutionFactory qef) { 171 super.learningProblem = lp; 172 this.lp = lp; 173 this.qef = qef; 174 } 175 176 public QTL2DisjunctiveMultiThreaded(PosNegLP lp, SparqlEndpointKS ks) { 177 this(lp, ks.getQueryExecutionFactory()); 178 } 179 180// public QTL2Disjunctive(PosNegLP lp, Model model) { 181// this.learningProblem = lp; 182// this.model = model; 183// } 184 185 /** 186 * Copy constructor. 187 * @param qtl the QTL2Disjunctive instance 188 */ 189 public QTL2DisjunctiveMultiThreaded(QTL2DisjunctiveMultiThreaded qtl) { 190 super(qtl.getLearningProblem(), qtl.getReasoner()); 191 this.model = ModelFactory.createDefaultModel(); 192 this.model.add(qtl.model); 193 this.beta = qtl.beta; 194 this.maxExecutionTimeInSeconds = qtl.maxExecutionTimeInSeconds; 195 this.maxTreeComputationTimeInSeconds = qtl.maxTreeComputationTimeInSeconds; 196 this.tryFullCoverage = qtl.tryFullCoverage; 197 this.stopOnFirstDefinition = qtl.stopOnFirstDefinition; 198 } 199 200 /* (non-Javadoc) 201 * @see org.dllearner.core.Component#init() 202 */ 203 @Override 204 public void init() throws ComponentInitException { 205 logger.info("Initializing..."); 206 if(!(learningProblem instanceof PosNegLP)){ 207 throw new IllegalArgumentException("Only PosNeg learning problems are supported"); 208 } 209 lp = (PosNegLP) learningProblem; 210 211 // get query execution factory from KS 212 if(qef == null) { 213 qef = ks.getQueryExecutionFactory(); 214 } 215 216 if(treeFactory == null) { 217 treeFactory = new QueryTreeFactoryBase(); 218 } 219 cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef); 220 221 // set the used heuristic 222 if(heuristic == null){ 223 heuristic = new QueryTreeHeuristicSimple(); 224 heuristic.setPosExamplesWeight(beta); 225 } 226 227 if(entailment == Entailment.SIMPLE) { 228 lggGenerator = new LGGGeneratorSimple(); 229// lggGenerator = new LGGGeneratorExt(); 230// ((LGGGeneratorExt)lggGenerator).setTreeFilters(Sets.newHashSet(new PredicateExistenceFilterDBpedia(null))); 231 } else if(entailment == Entailment.RDFS){ 232 lggGenerator = new LGGGeneratorRDFS(reasoner); 233 } 234 235 // generate the query trees 236 generateQueryTrees(); 237 238 startPosExamplesSize = currentPosExampleTrees.size(); 239 240 //console rendering of class expressions 241 StringRenderer.setRenderer(Rendering.MANCHESTER_SYNTAX); 242 StringRenderer.setShortFormProvider(new SimpleShortFormProvider()); 243 244 //compute the LGG for all examples 245 //this allows us to prune all other trees because we can omit paths in trees which are contained in all positive 246 //as well as negative examples 247// List<RDFResourceTree> allExamplesTrees = new ArrayList<RDFResourceTree>(); 248// allExamplesTrees.addAll(currentPosExampleTrees); 249// allExamplesTrees.addAll(currentNegExampleTrees); 250// RDFResourceTree lgg = lggGenerator.getLGG(allExamplesTrees); 251// lgg.dump(); 252 253 initialized = true; 254 logger.info("...initialization finished."); 255 } 256 257 /** 258 * @param entailment the entailment to set 259 */ 260 public void setEntailment(Entailment entailment) { 261 this.entailment = entailment; 262 } 263 264 public void setNrOfThreads(int nrOfThreads) { 265 this.nrOfThreads = nrOfThreads; 266 } 267 268 private void generateQueryTrees(){ 269 logger.info("Generating trees..."); 270 RDFResourceTree queryTree; 271 272 // positive examples 273 if(currentPosExampleTrees.isEmpty()){ 274 for (OWLIndividual ind : lp.getPositiveExamples()) { 275 try { 276 Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), maxTreeDepth); 277// cbd.write(new FileOutputStream("/tmp/dbpedia-" + ind.toStringID().substring(ind.toStringID().lastIndexOf('/') + 1) + ".ttl"), "TURTLE", null); 278 queryTree = treeFactory.getQueryTree(ind.toStringID(), cbd, maxTreeDepth); 279 tree2Individual.put(queryTree, ind); 280 currentPosExampleTrees.add(queryTree); 281 currentPosExamples.add(ind); 282 logger.debug(ind.toStringID()); 283 logger.debug(queryTree.getStringRepresentation()); 284 } catch (Exception e) { 285 logger.error("Failed to generate tree for resource " + ind, e); 286 throw new RuntimeException(); 287 } 288 } 289 } 290 291 // negative examples 292 if(currentNegExampleTrees.isEmpty()){ 293 for (OWLIndividual ind : lp.getNegativeExamples()) { 294 try { 295 Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), maxTreeDepth); 296 queryTree = treeFactory.getQueryTree(ind.toStringID(), cbd, maxTreeDepth); 297 tree2Individual.put(queryTree, ind); 298 currentNegExampleTrees.add(queryTree); 299 currentNegExamples.add(ind); 300 logger.debug(ind.toStringID()); 301 logger.debug(queryTree.getStringRepresentation()); 302 } catch (Exception e) { 303 logger.error("Failed to generate tree for resource " + ind, e); 304 throw new RuntimeException(); 305 } 306 } 307 } 308 logger.info("...done."); 309 } 310 311 /* (non-Javadoc) 312 * @see org.dllearner.core.LearningAlgorithm#start() 313 */ 314 @Override 315 public void start() { 316 if(currentPosExampleTrees.isEmpty()) { 317 logger.info("No positive examples given!"); 318 return; 319 } 320 321 printSetup(); 322 323 reset(); 324 logger.info("Running..."); 325 nanoStartTime = System.nanoTime(); 326 327// currentPosExampleTrees.stream().map(RDFResourceTree::getStringRepresentation).forEach(System.out::println); 328 329 // if noise=0 and there are no neg. examples, we can simply compute the LGG for all pos. examples 330 if(noise == 0 && currentNegExampleTrees.isEmpty()) { 331 lggGenerator.setTimeout(getRemainingRuntimeInMilliseconds(), TimeUnit.MILLISECONDS); 332 RDFResourceTree lgg = lggGenerator.getLGG(currentPosExampleTrees); 333 Set<EvaluatedRDFResourceTree> solutions = evaluate(lgg, false); 334 timeBestSolutionFound = getCurrentRuntimeInMilliSeconds(); 335 currentPartialSolutions.addAll(solutions); 336 partialSolutions.addAll(solutions); 337 currentBestSolution = solutions.iterator().next().asEvaluatedDescription(); 338 } else { 339 int i = 1; 340 while(!terminationCriteriaSatisfied() && (useDisjunction || i == 1)){ 341 logger.info(i++ + ". iteration..."); 342 logger.info("#Remaining pos. examples:" + currentPosExampleTrees.size()); 343 logger.info("#Remaining neg. examples:" + currentNegExampleTrees.size()); 344 345 // compute best (partial) solution computed so far 346 EvaluatedRDFResourceTree bestPartialSolution = computeBestPartialSolution(); 347 348 // add to set of partial solutions if criteria are satisfied 349 if(bestPartialSolution.getScore() >= minimumTreeScore){ 350 351 partialSolutions.add(bestPartialSolution); 352 353 // remove all examples covered by current partial solution 354 RDFResourceTree tree; 355 for (Iterator<RDFResourceTree> iterator = currentPosExampleTrees.iterator(); iterator.hasNext();) { 356 tree = iterator.next(); 357 if(!bestPartialSolution.getFalseNegatives().contains(tree)){//a pos tree that is not covered 358 iterator.remove(); 359 currentPosExamples.remove(tree2Individual.get(tree)); 360 } 361 } 362 for (Iterator<RDFResourceTree> iterator = currentNegExampleTrees.iterator(); iterator.hasNext();) { 363 tree = iterator.next(); 364 if(bestPartialSolution.getFalsePositives().contains(tree)){//a neg example that is covered 365 iterator.remove(); 366 currentNegExamples.remove(tree2Individual.get(tree)); 367 } 368 } 369 370 // (re)build the current combined solution from all partial solutions 371 currentBestSolution = buildCombinedSolution(); 372 373 logger.info("combined accuracy: " + dFormat.format(currentBestSolution.getAccuracy())); 374 } else { 375 String message = "No partial tree found which satisfies the minimal criteria."; 376 if(currentBestSolution != null) { 377 message += "- the best was: " 378 + currentBestSolution.getDescription() 379 + " with score " + currentBestSolution.getScore(); 380 } 381 logger.info(message); 382 } 383 384 } 385 } 386 387 isRunning = false; 388 389 postProcess(); 390 391 logger.info("Finished in {}ms.", getCurrentRuntimeInMilliSeconds()); 392 logger.info("{} descriptions tested", expressionTests); 393 if(currentBestSolution != null) { 394 logger.info("Combined solution:{}", currentBestSolution.getDescription().toString().replace("\n", "")); 395 logger.info(currentBestSolution.getScore().toString()); 396 } else { 397 logger.info("Could not find a solution in the given time."); 398 } 399 } 400 401 /** 402 * This method can be called for clean up of the solutions and re-ranking. 403 */ 404 private void postProcess() { 405 logger.trace("Post processing ..."); 406 // pick solutions with same accuracy, i.e. in the pos only case 407 // covering the same number of positive examples 408 SortedSet<EvaluatedRDFResourceTree> solutions = getSolutions(); 409 // pick solutions with accuracy above 410 // mas(maximum achievable score) - noise 411 List<EvaluatedRDFResourceTree> solutionsForPostProcessing = new ArrayList<>(); 412 for (EvaluatedRDFResourceTree solution : solutions) { 413 414 double accuracy = solution.getTreeScore().getAccuracy(); 415 416 double mas = heuristic.getMaximumAchievableScore(solution); 417 418 double epsilon = 0.01; 419 420 if(accuracy != mas && accuracy >= (mas - noise - epsilon)) { 421 solutionsForPostProcessing.add(solution); 422 } 423 } 424 425 logger.trace("Finished post processing."); 426 } 427 428 /** 429 * Compute a (partial) solution that covers as much positive examples as possible. 430 * @return a (partial) solution 431 */ 432 private EvaluatedRDFResourceTree computeBestPartialSolution(){ 433 logger.info("Computing best partial solution..."); 434 bestCurrentScore = Double.NEGATIVE_INFINITY; 435 partialSolutionStartTime = System.currentTimeMillis(); 436 initTodoList(currentPosExampleTrees, currentNegExampleTrees); 437 438 // generate id for each pos and neg example tree 439 TObjectIntMap<RDFResourceTree> index = new TObjectIntHashMap<>(this.currentPosExampleTrees.size() + this.currentNegExampleTrees.size()); 440 int id = 1; 441 for (RDFResourceTree posTree : currentPosExampleTrees) { 442 index.put(posTree, id++); 443 } 444 Set<Set<RDFResourceTree>> processedCombinations = new HashSet<>(); 445 446 ExecutorService pool = Executors.newFixedThreadPool(nrOfThreads); 447 448 while(!partialSolutionTerminationCriteriaSatisfied()){ 449 logger.trace("ToDo list size: " + todoList.size()); 450 // pick best element from todo list 451 EvaluatedRDFResourceTree currentElement = todoList.poll(); 452 final RDFResourceTree currentTree = currentElement.getTree(); 453 454 logger.trace("Next tree: {} ({})", currentElement.getBaseQueryTrees(), currentElement.getTreeScore()); 455 456 // generate the LGG between the chosen tree and each false negative resp. uncovered positive example 457 Collection<RDFResourceTree> falseNegatives = currentElement.getFalseNegatives(); 458 459 if(falseNegatives.isEmpty()) { // if the current solution covers already all pos examples 460// addToSolutions(bestPartialSolutionTree); 461// bestPartialSolutionTree = currentElement; 462 } 463 464 List<CompletableFuture<Void>> list = falseNegatives.stream() 465 .filter(fn -> !processedCombinations.contains(Sets.union(currentElement.getBaseQueryTrees(), Sets.newHashSet(fn)))) 466 .map(fn -> CompletableFuture.supplyAsync(() -> computePartialSolution(currentTree, fn, Sets.newTreeSet(Sets.union(currentElement.getBaseQueryTrees(), Sets.newHashSet(fn)))), pool)) 467 .map(solutionFuture -> solutionFuture.thenAccept(solutions -> { 468 for (EvaluatedRDFResourceTree solution : solutions) { 469 logger.trace("solution: {} ({})", solution.getBaseQueryTrees(), solution.getTreeScore()); 470 processedCombinations.add(solution.getBaseQueryTrees()); 471 expressionTests++; 472 double score = solution.getScore(); 473 double mas = heuristic.getMaximumAchievableScore(solution); 474 475 if (score >= bestCurrentScore) { 476 if (score > bestCurrentScore) { 477 timeBestSolutionFound = getCurrentRuntimeInMilliSeconds(); 478 logger.info("\tGot better solution after {}ms:" + solution.getTreeScore(), timeBestSolutionFound); 479// logger.info("\t" + solutionAsString(solution.asEvaluatedDescription())); 480 bestCurrentScore = score; 481 bestPartialSolutionTree = solution; 482 } 483 // add to ToDo list, if not already contained in ToDo list or solution list 484 if (bestCurrentScore == 1.0 || mas > score) { 485// todo(solution); 486 } 487 } else if (bestCurrentScore == 1.0 || mas >= bestCurrentScore) { // add to ToDo list if max. achievable score is higher 488// todo(solution); 489 } else { 490 logger.trace("Too weak: {}", solution.getTreeScore()); 491// System.err.println(solution.getEvaluatedDescription()); 492// System.out.println("Too general"); 493// System.out.println("MAS=" + mas + "\nBest=" + bestCurrentScore); 494// todo(solution); 495 } 496 todo(solution); 497 addToSolutions(solution); 498 } 499 })) 500 .collect(Collectors.toList()); 501 list.forEach(c -> { 502 try { 503 c.get(); 504 } catch (InterruptedException e) { 505 e.printStackTrace(); 506 } catch (ExecutionException e) { 507 e.printStackTrace(); 508 } 509 }); 510 511 512// while (it.hasNext() && !(useDisjunction && isPartialSolutionTimeExpired()) && !isTimeExpired()) { 513// RDFResourceTree uncoveredTree = it.next(); 514// logger.trace("Uncovered tree: " + uncoveredTree); 515// // we should avoid the computation of lgg(t2,t1) if we already did lgg(t1,t2) 516// Set<RDFResourceTree> baseQueryTrees = Sets.newTreeSet(currentElement.getBaseQueryTrees()); 517// baseQueryTrees.add(uncoveredTree); 518//// String s = ""; 519//// for (RDFResourceTree queryTree : baseQueryTrees) { 520//// s += index.get(queryTree) + ","; 521//// } 522//// System.err.println(s); 523// if (!processedCombinations.add(baseQueryTrees)) { 524//// System.err.println("skipping"); 525//// continue; 526// } 527// 528// PartialSolutionComputationTask callable = new PartialSolutionComputationTask(currentTree, uncoveredTree, baseQueryTrees); 529// Future<Set<EvaluatedRDFResourceTree>> future = pool.submit(callable); 530// set.add(future); 531// } 532 533// for (Future<Set<EvaluatedRDFResourceTree>> future : set) { 534// try { 535// Set<EvaluatedRDFResourceTree> solutions = future.get(); 536// 537// 538// } catch (InterruptedException e) { 539// e.printStackTrace(); 540// } catch (ExecutionException e) { 541// e.printStackTrace(); 542// } 543// } 544// addToSolutions(currentElement); 545 } 546 547 pool.shutdown(); 548 try { 549 pool.awaitTermination(1, TimeUnit.SECONDS); 550 } catch (InterruptedException e) { 551 e.printStackTrace(); 552 } 553 554 long endTime = System.currentTimeMillis(); 555 logger.info("...finished computing best partial solution in " + (endTime-partialSolutionStartTime) + "ms."); 556 EvaluatedDescription bestPartialSolution = bestPartialSolutionTree.asEvaluatedDescription(); 557 558 logger.info("Best partial solution: " + solutionAsString(bestPartialSolution) + "\n(" + bestPartialSolution.getScore() + ")"); 559 560 logger.trace("LGG time: " + MonitorFactory.getTimeMonitor("lgg").getTotal() + "ms"); 561 logger.trace("Avg. LGG time: " + MonitorFactory.getTimeMonitor("lgg").getAvg() + "ms"); 562 logger.info("#LGG computations: " + MonitorFactory.getTimeMonitor("lgg").getHits()); 563 564 logger.trace("Subsumption test time: " + MonitorFactory.getTimeMonitor("subsumption").getTotal() + "ms"); 565 logger.trace("Avg. subsumption test time: " + MonitorFactory.getTimeMonitor("subsumption").getAvg() + "ms"); 566 logger.trace("#Subsumption tests: " + MonitorFactory.getTimeMonitor("subsumption").getHits()); 567 568 return bestPartialSolutionTree; 569 } 570 571 private String solutionAsString(EvaluatedDescription ed) { 572 return renderer.render(ed.getDescription()).replace("\n", "").replaceAll("\\\\s{2,}", " "); 573 } 574 575 private boolean addToSolutions(EvaluatedRDFResourceTree solution) { 576 for (EvaluatedRDFResourceTree partialSolution : currentPartialSolutions) { 577 if(QueryTreeUtils.sameTrees(partialSolution.getTree(), solution.getTree())) { 578 return false; 579 } 580 } 581 return currentPartialSolutions.add(solution); 582 } 583 584 /** 585 * Initializes the ToDo list with all distinct trees contained in the given list of positive 586 * example trees {@code posExamples} and negative example trees {@code negExamples}. 587 * First, distinct trees are computed and afterwards, for each tree an initial score will be 588 * computed. 589 * @param posExamples the positive example trees 590 * @param negExamples the negative example trees 591 */ 592 private void initTodoList(List<RDFResourceTree> posExamples, List<RDFResourceTree> negExamples){ 593 todoList = new PriorityBlockingQueue<>(); 594// EvaluatedRDFResourceTree dummy = new EvaluatedRDFResourceTree(new QueryTreeImpl<String>((N)"TOP"), trees, 0d); 595// todoList.add(dummy); 596 597 // compute distinct trees, i.e. check if some of the trees already cover others 598 Collection<RDFResourceTree> distinctTrees = new ArrayList<>(); 599 for (RDFResourceTree queryTree : posExamples) { 600 boolean distinct = true; 601 for (RDFResourceTree otherTree : distinctTrees) { 602 if(!queryTree.equals(otherTree)){ 603 if(QueryTreeUtils.sameTrees(queryTree, otherTree)){ 604 distinct = false; 605 break; 606 } 607 } 608 } 609 if(distinct){ 610 distinctTrees.add(queryTree); 611 } 612 } 613 614 // compute an initial score 615 for (RDFResourceTree queryTree : distinctTrees) { 616 EvaluatedRDFResourceTree evaluatedQueryTree = evaluateSimple(queryTree, false); 617 evaluatedQueryTree.setBaseQueryTrees(Collections.singleton(queryTree)); 618 todoList.add(evaluatedQueryTree); 619 } 620 } 621 622 /** 623 * @return TRUE if the query tree is already contained in the solutions or 624 * todo list, otherwise FALSE 625 */ 626 private boolean isRedundant(RDFResourceTree tree) { 627 //check if not already contained in todo list 628 for (EvaluatedRDFResourceTree evTree : todoList) { 629 if(QueryTreeUtils.sameTrees(tree, evTree.getTree())){ 630 logger.trace("Not added to TODO list: Already contained in."); 631// logger.trace(evTree.getBaseQueryTrees().toString()); 632 return true; 633 } 634 } 635 636 //check if not already contained in solutions 637 for (EvaluatedRDFResourceTree evTree : currentPartialSolutions) { 638 if(QueryTreeUtils.sameTrees(tree, evTree.getTree())){ 639 logger.trace("Not added to partial solutions list: Already contained in."); 640 return true; 641 } 642 } 643 return false; 644 } 645 646 /** 647 * Add tree to ToDo list if not already contained in that list or the solutions. 648 * @param solution the solution 649 */ 650 private void todo(EvaluatedRDFResourceTree solution){ 651 logger.trace("Added to TODO list."); 652 todoList.add(solution); 653 } 654 655 private EvaluatedRDFResourceTree evaluateSimple(RDFResourceTree tree, boolean useSpecifity){ 656 //1. get a score for the coverage = recall oriented 657 //compute positive examples which are not covered by LGG 658 List<RDFResourceTree> uncoveredPositiveExampleTrees = getUncoveredTrees(tree, currentPosExampleTrees); 659 Set<OWLIndividual> uncoveredPosExamples = new TreeSet<>(); 660 for (RDFResourceTree queryTree : uncoveredPositiveExampleTrees) { 661 uncoveredPosExamples.add(tree2Individual.get(queryTree)); 662 } 663 //compute negative examples which are covered by LGG 664 Collection<RDFResourceTree> coveredNegativeExampleTrees = getCoveredTrees(tree, currentNegExampleTrees); 665 Set<OWLIndividual> coveredNegExamples = new TreeSet<>(); 666 for (RDFResourceTree queryTree : coveredNegativeExampleTrees) { 667 coveredNegExamples.add(tree2Individual.get(queryTree)); 668 } 669 //compute score 670 int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size(); 671 double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size(); 672 double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0) 673 ? 0 674 : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size()); 675 676 double coverageScore = Heuristics.getFScore(recall, precision, beta); 677 678 //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented 679 int nrOfSpecificNodes = 0; 680 for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)) { 681 if(!childNode.isVarNode()){ 682 nrOfSpecificNodes++; 683 } 684 } 685 double specifityScore = 0d; 686 if(useSpecifity){ 687 specifityScore = Math.log(nrOfSpecificNodes); 688 } else { 689 specifityScore = 1 / (double) nrOfSpecificNodes; 690 } 691 692 //3.compute the total score 693 double score = coverageWeight * coverageScore + specifityWeight * specifityScore; 694 695 QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, 696 new TreeSet<>(Sets.difference(currentPosExamples, uncoveredPosExamples)), uncoveredPosExamples, 697 coveredNegExamples, new TreeSet<>(Sets.difference(currentNegExamples, coveredNegExamples)), 698 specifityScore, nrOfSpecificNodes); 699 700// QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, 701// null,null,null,null, 702// specifityScore, nrOfSpecificNodes); 703 704 EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore); 705 706 //TODO use only the heuristic to compute the score 707 score = heuristic.getScore(evaluatedTree); 708 queryTreeScore.setScore(score); 709 queryTreeScore.setAccuracy(score); 710 711 return evaluatedTree; 712 } 713 714 /** 715 * Evaluated a query tree such that it returns a set of evaluated query trees. 716 * A set is returned because there are several ways how to convert literal nodes. 717 * @param tree the query tree 718 * @param useSpecifity whether to use SPECIFITY as measure 719 * @return a set of evaluated query trees 720 */ 721 private Set<EvaluatedRDFResourceTree> evaluate(RDFResourceTree tree, boolean useSpecifity){ 722 Set<EvaluatedRDFResourceTree> evaluatedTrees = new TreeSet<>(); 723 724 LiteralNodeSubsumptionStrategy[] strategies = LiteralNodeSubsumptionStrategy.values(); 725 strategies = new LiteralNodeSubsumptionStrategy[]{ 726 LiteralNodeSubsumptionStrategy.DATATYPE, 727// LiteralNodeSubsumptionStrategy.INTERVAL, 728// LiteralNodeSubsumptionStrategy.MIN, 729// LiteralNodeSubsumptionStrategy.MAX, 730 }; 731 for (LiteralNodeSubsumptionStrategy strategy : strategies) { 732 // 1. get a score for the coverage = recall oriented 733 List<RDFResourceTree> uncoveredPositiveExampleTrees = new ArrayList<>(); 734 List<RDFResourceTree> coveredNegativeExampleTrees = new ArrayList<>(); 735 736 // compute positive examples which are not covered by LGG 737 for (RDFResourceTree posTree : currentPosExampleTrees) { 738// System.out.print(currentPosExampleTrees.indexOf(posTree) + ":"); 739 if(!QueryTreeUtils.isSubsumedBy(posTree, tree, entailment, reasoner)){ 740// System.err.println(posTree.getStringRepresentation(true));System.err.println(tree.getStringRepresentation(true)); 741// System.out.println("FALSE"); 742 uncoveredPositiveExampleTrees.add(posTree); 743 } else { 744// System.out.println("TRUE"); 745 } 746 } 747 748 // compute negative examples which are covered by LGG 749 for (RDFResourceTree negTree : currentNegExampleTrees) { 750 if(QueryTreeUtils.isSubsumedBy(negTree, tree, entailment, reasoner)){ 751 coveredNegativeExampleTrees.add(negTree); 752 } 753 } 754 755 // convert to individuals 756 Set<OWLIndividual> uncoveredPosExamples = asIndividuals(uncoveredPositiveExampleTrees); 757 Set<OWLIndividual> coveredNegExamples = asIndividuals(coveredNegativeExampleTrees); 758 759 // compute score 760 int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size(); 761 double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size(); 762 double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0) 763 ? 0 764 : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size()); 765 766 double coverageScore = Heuristics.getFScore(recall, precision, beta); 767 768 // 2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented 769 int nrOfSpecificNodes = 0; 770 for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)) { 771 if(!childNode.isVarNode()){ 772 nrOfSpecificNodes++; 773 } 774 } 775 double specifityScore = 0d; 776 if(useSpecifity){ 777 specifityScore = Math.log(nrOfSpecificNodes); 778 } 779 780 // 3.compute the total score 781 double score = coverageWeight * coverageScore + specifityWeight * specifityScore; 782 783 QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, 784 new TreeSet<>(Sets.difference(currentPosExamples, uncoveredPosExamples)), uncoveredPosExamples, 785 coveredNegExamples, new TreeSet<>(Sets.difference(currentNegExamples, coveredNegExamples)), 786 specifityScore, nrOfSpecificNodes); 787 788 EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore); 789 790 //TODO use only the heuristic to compute the score 791 score = heuristic.getScore(evaluatedTree); 792 queryTreeScore.setScore(score); 793 queryTreeScore.setAccuracy(score); 794 795 evaluatedTrees.add(evaluatedTree); 796 } 797 798 return evaluatedTrees; 799 } 800 801 /** 802 * Evaluated a query tree such that it returns a set of evaluated query trees. 803 * A set is returned because there are several ways how to convert literal nodes. 804 * @param tree the query tree 805 * @param useSpecifity whether to use SPECIFITY as measure 806 * @return a set of evaluated query trees 807 */ 808 private Set<EvaluatedRDFResourceTree> evaluate2(RDFResourceTree tree, boolean useSpecifity){ 809 Set<EvaluatedRDFResourceTree> evaluatedTrees = new TreeSet<>(); 810 811 //test different strategies on the conversion of literal nodes 812 Set<OWLClassExpression> combinations = new HashSet<>(); 813 814 for (LiteralNodeConversionStrategy strategy : strategies) { 815 OWLClassExpression ce = QueryTreeUtils.toOWLClassExpression(tree); 816 combinations.add(ce); 817 } 818 //compute all combinations of different types of facets 819// OWLClassExpression ce = tree.asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION); 820// combinations = ce.accept(new ClassExpressionLiteralCombination()); 821 for (OWLClassExpression c : combinations) { 822 //convert to individuals 823 SortedSet<OWLIndividual> coveredExamples = reasoner.getIndividuals(c); 824 Set<OWLIndividual> coveredPosExamples = new TreeSet<>(Sets.intersection(currentPosExamples, coveredExamples)); 825 Set<OWLIndividual> uncoveredPosExamples = new TreeSet<>(Sets.difference(currentPosExamples, coveredExamples)); 826 Set<OWLIndividual> coveredNegExamples = new TreeSet<>(Sets.intersection(currentNegExamples, coveredExamples)); 827 Set<OWLIndividual> uncoveredNegExamples = new TreeSet<>(Sets.difference(currentNegExamples, coveredExamples)); 828 829 //compute score 830 double recall = coveredPosExamples.size() / (double)currentPosExamples.size(); 831 double precision = (coveredNegExamples.size() + coveredPosExamples.size() == 0) 832 ? 0 833 : coveredPosExamples.size() / (double)(coveredPosExamples.size() + coveredNegExamples.size()); 834 835 double coverageScore = Heuristics.getFScore(recall, precision, beta); 836 837 //2. get a score for the specificity of the query, i.e. how many edges/nodes = precision oriented 838 int nrOfSpecificNodes = 0; 839 for (RDFResourceTree childNode : QueryTreeUtils.getNodes(tree)){ 840 if(!childNode.isVarNode()){ 841 nrOfSpecificNodes++; 842 } 843 } 844 double specifityScore = 0d; 845 if(useSpecifity){ 846 specifityScore = Math.log(nrOfSpecificNodes); 847 } 848 849 //3.compute the total score 850 double score = coverageWeight * coverageScore + specifityWeight * specifityScore; 851 852 QueryTreeScore queryTreeScore = new QueryTreeScore( 853 score, coverageScore, 854 coveredPosExamples, uncoveredPosExamples, 855 coveredNegExamples, uncoveredNegExamples, 856 specifityScore, nrOfSpecificNodes); 857 858 //TODO use only the heuristic to compute the score 859 EvaluatedRDFResourceTree evaluatedTree = new EvaluatedRDFResourceTree(tree, 860 asQueryTrees(uncoveredPosExamples), asQueryTrees(coveredNegExamples), queryTreeScore); 861 score = heuristic.getScore(evaluatedTree); 862 queryTreeScore.setScore(score); 863 queryTreeScore.setAccuracy(score); 864 865 866 EvaluatedDescription evaluatedDescription = new EvaluatedDescription(c, queryTreeScore); 867 868 evaluatedTree.setDescription(evaluatedDescription); 869 870 evaluatedTrees.add(evaluatedTree); 871 } 872 return evaluatedTrees; 873 } 874 875 private EvaluatedDescription<? extends Score> buildCombinedSolution(){ 876 EvaluatedDescription<? extends Score> bestCombinedSolution = null; 877 double bestScore = Double.NEGATIVE_INFINITY; 878 LiteralNodeConversionStrategy[] strategies = LiteralNodeConversionStrategy.values(); 879 strategies = new LiteralNodeConversionStrategy[]{LiteralNodeConversionStrategy.DATATYPE}; 880 for (LiteralNodeConversionStrategy strategy : strategies) { 881 EvaluatedDescription<? extends Score> combinedSolution; 882 if(partialSolutions.size() == 1){ 883 combinedSolution = partialSolutions.get(0).asEvaluatedDescription(); 884 } else { 885 Set<OWLClassExpression> disjuncts = new TreeSet<>(); 886 887 Set<OWLIndividual> posCovered = new HashSet<>(); 888 Set<OWLIndividual> negCovered = new HashSet<>(); 889 890 //build the union of all class expressions 891 OWLClassExpression partialDescription; 892 for (EvaluatedRDFResourceTree partialSolution : partialSolutions) { 893 partialDescription = partialSolution.asEvaluatedDescription().getDescription(); 894 disjuncts.add(partialDescription); 895 posCovered.addAll(partialSolution.getTreeScore().getCoveredPositives()); 896 negCovered.addAll(partialSolution.getTreeScore().getCoveredNegatives()); 897 } 898 OWLClassExpression unionDescription = dataFactory.getOWLObjectUnionOf(disjuncts); 899 900 Set<OWLIndividual> posNotCovered = Sets.difference(lp.getPositiveExamples(), posCovered); 901 Set<OWLIndividual> negNotCovered = Sets.difference(lp.getNegativeExamples(), negCovered); 902 903 //compute the coverage 904 double recall = posCovered.size() / (double)lp.getPositiveExamples().size(); 905 double precision = (posCovered.size() + negCovered.size() == 0) 906 ? 0 907 : posCovered.size() / (double)(posCovered.size() + negCovered.size()); 908 909 double coverageScore = Heuristics.getFScore(recall, precision, beta); 910 911// ScoreTwoValued score = new ScoreTwoValued(posCovered, posNotCovered, negCovered, negNotCovered); 912// score.setAccuracy(coverageScore); 913 QueryTreeScore score = new QueryTreeScore(coverageScore, coverageScore, posCovered, posNotCovered, negCovered, negNotCovered, -1, -1); 914 915 combinedSolution = new EvaluatedDescription(unionDescription, score); 916 } 917 if(combinedSolution.getAccuracy() > bestScore){ 918 bestCombinedSolution = combinedSolution; 919 bestCurrentScore = combinedSolution.getAccuracy(); 920 } 921 } 922 return bestCombinedSolution; 923 } 924 925 private void reset(){ 926 stop = false; 927 isRunning = true; 928 929 currentBestSolution = null; 930 partialSolutions = new ArrayList<>(); 931 932 bestCurrentScore = minimumTreeScore; 933 934 MonitorFactory.getTimeMonitor("lgg").reset(); 935 nanoStartTime = System.nanoTime(); 936 } 937 938 /* (non-Javadoc) 939 * @see org.dllearner.core.StoppableLearningAlgorithm#stop() 940 */ 941 @Override 942 public void stop() { 943 stop = true; 944 } 945 946 /* (non-Javadoc) 947 * @see org.dllearner.core.AbstractCELA#getCurrentlyBestDescription() 948 */ 949 @Override 950 public OWLClassExpression getCurrentlyBestDescription() { 951 return currentBestSolution.getDescription(); 952 } 953 954 /* (non-Javadoc) 955 * @see org.dllearner.core.AbstractCELA#getCurrentlyBestEvaluatedDescription() 956 */ 957 @Override 958 public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { 959 return currentBestSolution; 960 } 961 962 /* (non-Javadoc) 963 * @see org.dllearner.core.StoppableLearningAlgorithm#isRunning() 964 */ 965 @Override 966 public boolean isRunning() { 967 return isRunning; 968 } 969 970// @Autowired 971// public void setLearningProblem(PosNegLP learningProblem) { 972// this.lp = learningProblem; 973// } 974 975// @Autowired 976 @Override 977 public void setReasoner(AbstractReasonerComponent reasoner){ 978 super.setReasoner(reasoner); 979// loadModel(); 980 } 981 982 private void loadModel(){ 983 model = ModelFactory.createDefaultModel(); 984 for (KnowledgeSource ks : reasoner.getSources()) { 985 if(ks instanceof OWLFile){ 986 try { 987 model.read(((OWLFile) ks).getURL().openStream(), null); 988 } catch (IOException e) { 989 e.printStackTrace(); 990 } 991 } else if(ks instanceof OWLAPIOntology){ 992 ByteArrayInputStream bais = new ByteArrayInputStream(((OWLAPIOntology) ks).getConverter().convert(((OWLAPIOntology) ks).getOntology())); 993 model.read(bais, null); 994 try { 995 bais.close(); 996 } catch (IOException e) { 997 e.printStackTrace(); 998 } 999 } 1000 } 1001 } 1002 1003 private Set<OWLIndividual> asIndividuals(Collection<RDFResourceTree> trees){ 1004 Set<OWLIndividual> individuals = new HashSet<>(trees.size()); 1005 for (RDFResourceTree queryTree : trees) { 1006 individuals.add(tree2Individual.get(queryTree)); 1007 } 1008 return individuals; 1009 } 1010 1011 private Set<RDFResourceTree> asQueryTrees(Collection<OWLIndividual> individuals){ 1012 Set<RDFResourceTree> trees = new HashSet<>(individuals.size()); 1013 for (OWLIndividual ind : individuals) { 1014 trees.add(tree2Individual.inverse().get(ind)); 1015 } 1016 return trees; 1017 } 1018 1019 /** 1020 * Computes all trees from the given list {@code allTrees} which are subsumed by {@code tree}. 1021 * @param tree the tree 1022 * @param trees all trees 1023 * @return all trees from the given list {@code allTrees} which are subsumed by {@code tree} 1024 */ 1025 private List<RDFResourceTree> getCoveredTrees(RDFResourceTree tree, List<RDFResourceTree> trees){ 1026 List<RDFResourceTree> coveredTrees = new ArrayList<>(); 1027 for (RDFResourceTree queryTree : trees) { 1028 if(QueryTreeUtils.isSubsumedBy(queryTree, tree)){ 1029 coveredTrees.add(queryTree); 1030 } 1031 } 1032 return coveredTrees; 1033 } 1034 1035 /** 1036 * Computes all trees from the given list {@code trees} which are not subsumed by {@code tree}. 1037 * @param tree the tree 1038 * @param trees the trees 1039 * @return all trees from the given list {@code trees} which are not subsumed by {@code tree}. 1040 */ 1041 private List<RDFResourceTree> getUncoveredTrees(RDFResourceTree tree, List<RDFResourceTree> trees){ 1042 List<RDFResourceTree> uncoveredTrees = new ArrayList<>(); 1043 for (RDFResourceTree queryTree : trees) { 1044 if(!QueryTreeUtils.isSubsumedBy(queryTree, tree)){ 1045 uncoveredTrees.add(queryTree); 1046 } 1047 } 1048 return uncoveredTrees; 1049 } 1050 1051 private boolean terminationCriteriaSatisfied() { 1052 //stop was called or time expired 1053 if(stop || isTimeExpired()){ 1054 return true; 1055 } 1056 1057 // stop if there are no more positive examples to cover 1058 if (stopOnFirstDefinition && currentPosExamples.isEmpty()) { 1059 return true; 1060 } 1061 1062 // we stop when the score of the last tree added is too low 1063 // (indicating that the algorithm could not find anything appropriate 1064 // in the timeframe set) 1065 if (bestCurrentScore < minimumTreeScore) { 1066 return true; 1067 } 1068 1069 // stop when almost all positive examples have been covered 1070 if (tryFullCoverage) { 1071 return false; 1072 } else { 1073 int maxPosRemaining = (int) Math.ceil(startPosExamplesSize * 0.05d); 1074 return (currentPosExamples.size() <= maxPosRemaining); 1075 } 1076 } 1077 1078 private boolean partialSolutionTerminationCriteriaSatisfied(){ 1079 return stop || todoList.isEmpty() || currentPosExampleTrees.isEmpty() || (useDisjunction && isPartialSolutionTimeExpired()) || isTimeExpired(); 1080 } 1081 1082 private boolean isPartialSolutionTimeExpired(){ 1083 return maxTreeComputationTimeInSeconds > 0 && getRemainingPartialSolutionTime() > 0; 1084 } 1085 1086 private long getRemainingPartialSolutionTime() { 1087 return (long) (maxTreeComputationTimeInSeconds - (System.currentTimeMillis() - partialSolutionStartTime) / 1000); 1088 } 1089 1090 /** 1091 * Shows the current setup of the algorithm. 1092 */ 1093 private void printSetup(){ 1094 String setup = "Setup:"; 1095 setup += "\n#Pos. examples:" + currentPosExampleTrees.size(); 1096 setup += "\n#Neg. examples:" + currentNegExampleTrees.size(); 1097 setup += "\nHeuristic:" + heuristic.getHeuristicType().name(); 1098 setup += "\nNoise value=" + noise; 1099 setup += "\nbeta=" + beta; 1100 logger.info(setup); 1101 } 1102 1103 /** 1104 * @param noisePercentage the noisePercentage to set 1105 */ 1106 public void setNoisePercentage(double noisePercentage) { 1107 this.noisePercentage = noisePercentage; 1108 } 1109 1110 /** 1111 * @param noise the noise to set 1112 */ 1113 public void setNoise(double noise) { 1114 this.noise = noise; 1115 } 1116 1117 /** 1118 * Default value is 1. Lower values force importance of covering positive examples. 1119 * @param beta the beta to set 1120 */ 1121 public void setBeta(double beta) { 1122 this.beta = beta; 1123 } 1124 1125 /** 1126 * Set the max. execution time for the computation of a partial solution. If this value isn't set, the 1127 * max. algorithm runtime will be used, thus, in worst case only one partial solution was computed. 1128 * 1129 * @param maxTreeComputationTimeInSeconds the max. computation for a partial solution tree 1130 */ 1131 public void setMaxTreeComputationTimeInSeconds(double maxTreeComputationTimeInSeconds) { 1132 this.maxTreeComputationTimeInSeconds = maxTreeComputationTimeInSeconds; 1133 } 1134 1135 /** 1136 * @return the heuristic 1137 */ 1138 public QueryTreeHeuristic getHeuristic() { 1139 return heuristic; 1140 } 1141 1142 /** 1143 * @param heuristic the heuristic to set 1144 */ 1145 public void setHeuristic(QueryTreeHeuristic heuristic) { 1146 this.heuristic = heuristic; 1147 } 1148 1149 /** 1150 * @param treeFactory the treeFactory to set 1151 */ 1152 public void setTreeFactory(QueryTreeFactory treeFactory) { 1153 this.treeFactory = treeFactory; 1154 } 1155 1156 public EvaluatedRDFResourceTree getBestSolution(){ 1157 return currentPartialSolutions.last(); 1158 } 1159 1160 public SortedSet<EvaluatedRDFResourceTree> getSolutions(){ 1161 return currentPartialSolutions; 1162 } 1163 1164 public List<EvaluatedRDFResourceTree> getSolutionsAsList(){ 1165 // Collections.sort(list, Collections.reverseOrder()); 1166 return new ArrayList<>(currentPartialSolutions); 1167 } 1168 1169 /** 1170 * @param positiveExampleTrees the positive example trees to set 1171 */ 1172 public void setPositiveExampleTrees(Map<OWLIndividual,RDFResourceTree> positiveExampleTrees) { 1173 this.currentPosExampleTrees = new ArrayList<>(positiveExampleTrees.values()); 1174 this.currentPosExamples = new HashSet<>(positiveExampleTrees.keySet()); 1175 1176 for (Entry<OWLIndividual, RDFResourceTree> entry : positiveExampleTrees.entrySet()) { 1177 OWLIndividual ind = entry.getKey(); 1178 RDFResourceTree tree = entry.getValue(); 1179 tree2Individual.put(tree, ind); 1180 } 1181 } 1182 1183 /** 1184 * @param negativeExampleTrees the negative example trees to set 1185 */ 1186 public void setNegativeExampleTrees(Map<OWLIndividual,RDFResourceTree> negativeExampleTrees) { 1187 this.currentNegExampleTrees = new ArrayList<>(negativeExampleTrees.values()); 1188 this.currentNegExamples = new HashSet<>(negativeExampleTrees.keySet()); 1189 1190 for (Entry<OWLIndividual, RDFResourceTree> entry : negativeExampleTrees.entrySet()) { 1191 OWLIndividual ind = entry.getKey(); 1192 RDFResourceTree tree = entry.getValue(); 1193 tree2Individual.put(tree, ind); 1194 } 1195 } 1196 1197 /** 1198 * @param ks the ks to set 1199 */ 1200 @Autowired 1201 public void setKs(SparqlEndpointKS ks) { 1202 this.ks = ks; 1203 } 1204 1205 /** 1206 * @param maxTreeDepth the maximum depth of the trees, if those have to be generated 1207 * first. The default depth is 2. 1208 */ 1209 public void setMaxTreeDepth(int maxTreeDepth) { 1210 this.maxTreeDepth = maxTreeDepth; 1211 } 1212 1213 /** 1214 * @return the runtime in ms until the best solution was found 1215 */ 1216 public long getTimeBestSolutionFound() { 1217 return timeBestSolutionFound; 1218 } 1219 1220 /* (non-Javadoc) 1221 * @see java.lang.Object#clone() 1222 */ 1223 @Override 1224 public Object clone() throws CloneNotSupportedException { 1225 super.clone(); 1226 return new QTL2DisjunctiveMultiThreaded(this); 1227 } 1228 1229 private Set<EvaluatedRDFResourceTree> computePartialSolution(RDFResourceTree tree1, RDFResourceTree tree2, Set<RDFResourceTree> baseQueryTrees) { 1230 try { 1231// System.err.println(baseQueryTrees); 1232 1233 LGGGeneratorSimple lggGenerator = new LGGGeneratorSimple(); 1234 // compute the LGG 1235 MonitorFactory.getTimeMonitor("lgg").start(); 1236 lggGenerator.setTimeout(getRemainingPartialSolutionTime(), TimeUnit.SECONDS); 1237 RDFResourceTree lgg = lggGenerator.getLGG(tree1, tree2); 1238 MonitorFactory.getTimeMonitor("lgg").stop(); 1239// System.out.println("COMPLETE:" + ((LGGGeneratorSimple)lggGenerator).isComplete()); 1240// logger.info("LGG: " + lgg.getStringRepresentation()); 1241 1242 // redundancy check 1243 boolean redundant = isRedundant(lgg); 1244 if (redundant) { 1245 logger.trace("redundant"); 1246 return Collections.emptySet(); 1247 } 1248 1249 // evaluate the LGG 1250 Set<EvaluatedRDFResourceTree> solutions = evaluate(lgg, true); 1251 solutions.forEach(s -> s.setBaseQueryTrees(baseQueryTrees)); 1252 1253 return solutions; 1254 } catch (Exception e) { 1255 e.printStackTrace(); 1256 } 1257 return null; 1258 } 1259}