001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.sparql; 020 021import com.jamonapi.Monitor; 022import com.jamonapi.MonitorFactory; 023import org.apache.log4j.Logger; 024import org.dllearner.core.AbstractKnowledgeSource; 025import org.dllearner.core.ComponentAnn; 026import org.dllearner.kb.OWLOntologyKnowledgeSource; 027import org.dllearner.kb.aquisitors.SparqlTupleAquisitor; 028import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved; 029import org.dllearner.kb.aquisitors.TupleAquisitor; 030import org.dllearner.kb.extraction.Configuration; 031import org.dllearner.kb.extraction.Manager; 032import org.dllearner.kb.extraction.Node; 033import org.dllearner.kb.manipulator.Manipulator; 034import org.dllearner.kb.manipulator.ObjectReplacementRule; 035import org.dllearner.kb.manipulator.PredicateReplacementRule; 036import org.dllearner.kb.manipulator.Rule.Months; 037import org.dllearner.utilities.Files; 038import org.dllearner.utilities.JamonMonitorLogger; 039import org.dllearner.utilities.datastructures.StringTuple; 040import org.dllearner.utilities.owl.OntologyToByteConverter; 041import org.dllearner.utilities.owl.SimpleOntologyToByteConverter; 042import org.dllearner.utilities.statistics.SimpleClock; 043import org.semanticweb.owlapi.model.OWLOntology; 044import org.semanticweb.owlapi.model.OWLOntologyManager; 045 046import javax.swing.*; 047import java.io.File; 048import java.net.URL; 049import java.util.*; 050 051/** 052 * Represents the SPARQL Endpoint Component. 053 * 054 * @author Jens Lehmann 055 * @author Sebastian Knappe 056 * @author Sebastian Hellmann 057 */ 058@ComponentAnn(name = "SPARQL endpoint fragment", shortName = "sparqlfrag", version = 0.5) 059public class SparqlKnowledgeSource extends AbstractKnowledgeSource implements OWLOntologyKnowledgeSource{ 060 061 private ProgressMonitor mon; 062 063 private static final boolean debugExitAfterExtraction = false; // switches 064 065 private byte[] ontologyBytes; 066 private OntologyToByteConverter converter = new SimpleOntologyToByteConverter(); 067 068 public SparqlKnowledgeSource() {} 069 070 public SparqlKnowledgeSource(URL url, Set<String> instances) { 071 this.url = url; 072 this.instances = instances; 073 } 074 075 private SparqlEndpoint endpoint = null; 076 077 //private String format = "N-TRIPLES"; 078 //private String format = "RDF/XML"; 079 080 private URL ontologyFragmentURL; 081 082 private Manipulator manipulator = null; 083 084 // received ontology as array, used if format=Array(an element of the 085 // array consists of the subject, predicate and object separated by '<' 086 //private String[] ontArray; 087 088 // received ontology as KB, the internal format 089 //private KB kb; 090 091 // mainly used for statistic 092 private int nrOfExtractedAxioms = 0; 093 094 //// TODO: turn those into config options /// 095 private URL url; 096 097 private Set<String> instances; 098 099 private int recursionDepth = 1; 100 101 private boolean getAllSuperClasses = true; 102 103 private boolean closeAfterRecursion = true; 104 105 private boolean propertyInformation; 106 107 private int breakSuperClassRetrievalAfter = 1000; 108 109 private boolean dissolveBlankNodes = true; 110 111 private boolean saveExtractedFragment = false; 112 113 private String predefinedEndpoint; 114 115 private Collection<String> defaultGraphURIs = new LinkedList<>(); 116 117 private Collection<String> namedGraphURIs = new LinkedList<>(); 118 119 private boolean useCache = true; 120 121 private String cacheDir = "cache"; 122 123 private boolean useCacheDatabase; 124 125 private String predefinedFilter; 126 127 private Set<String> objList = new TreeSet<>(); 128 129 private Set<String> predList = new TreeSet<>() ; 130 131 private boolean useLits = true; 132 133 private String predefinedManipulator; 134 135 private List<StringTuple> replacePredicate = new LinkedList<>(); 136 137 private boolean useImprovedSparqlTupelAquisitor; 138 139 private List<StringTuple> replaceObject = new LinkedList<>(); 140 141 private static Logger logger = Logger 142 .getLogger(SparqlKnowledgeSource.class); 143 144 /* 145 * (non-Javadoc) 146 * 147 * @see org.dllearner.core.Component#init() 148 */ 149 @Override 150 public void init() { 151 logger.info("SparqlModul: Collecting Ontology"); 152 SimpleClock totalTime = new SimpleClock(); 153 //SimpleClock extractionTime = new SimpleClock(); 154 if(mon != null){ 155 mon.setNote("Collecting Ontology"); 156 } 157 logger.trace(getURL()); 158 logger.trace(getSparqlEndpoint()); 159// logger.trace(configurator.getInstances()); 160 Manager m = new Manager(); 161 m.addProgressMonitor(mon); 162 163 // get Options for Manipulator 164 Manipulator manipulator = getManipulator(); 165 166 TupleAquisitor tupleAquisitor = getTupleAquisitor(); 167 168 Configuration configuration = new Configuration(tupleAquisitor, 169 manipulator, recursionDepth, getAllSuperClasses, 170 closeAfterRecursion, propertyInformation, breakSuperClassRetrievalAfter, 171 dissolveBlankNodes); 172 173 // give everything to the manager 174 m.useConfiguration(configuration); 175 176 //String ont = ""; 177 try { 178 179 // the actual extraction is started here 180 Monitor extractionTime = JamonMonitorLogger.getTimeMonitor(SparqlKnowledgeSource.class, "total extraction time").start(); 181 List<Node> seedNodes= new ArrayList<>(); 182 183 //if(!threaded){ 184 seedNodes = m.extract(instances); 185 /*}else{ 186 int maxPoolSize = configurator.getInstances().size(); 187 ThreadPoolExecutor ex = new ThreadPoolExecutor(5,maxPoolSize,1,TimeUnit.SECONDS,new ArrayBlockingQueue<Runnable>(100)); 188 List<FutureTask<Node>> tasks = new ArrayList<FutureTask<Node>>(); 189 190 for (String uri : configurator.getInstances()) { 191 192 ExtractOneInstance e = new ExtractOneInstance(m,uri); 193 194 FutureTask<Node> ft = new FutureTask<Node>(e); 195 ex.submit(ft); 196 tasks.add(ft); 197 //System.out.println(f.get()); 198 //seedNodes.add(f.get()); 199 //System.out.println("finished FutureTask "+seedNodes.size()); 200 } 201 for(FutureTask<Node> ft : tasks){ 202 //System.out.println(ft.get()); 203 //System.out.println("aaa"); 204 seedNodes.add(ft.get()); 205 206 } 207 }*/ 208 extractionTime.stop(); 209 210 // Do this so that we can support the OWLOntologyKnowledgeSource 211 // and can be thread safe. 212 OWLOntology fragment = m.getOWLAPIOntologyForNodes(seedNodes, saveExtractedFragment); 213 ontologyBytes = getConverter().convert(fragment); 214 215 logger.info("Finished collecting fragment. needed "+extractionTime.getLastValue()+" ms"); 216 217 ontologyFragmentURL = m.getPhysicalOntologyURL(); 218 219 nrOfExtractedAxioms = configuration.getOwlAPIOntologyCollector().getNrOfExtractedAxioms(); 220 221 } catch (Exception e) { 222 e.printStackTrace(); 223 } 224 //nrOfExtractedTriples = m.getNrOfExtractedTriples(); 225 logger.info("SparqlModul: ****Finished " + totalTime.getAndSet("")); 226 if (debugExitAfterExtraction) { 227 228 File jamonlog = new File("log/jamon.html"); 229 Files.createFile(jamonlog, MonitorFactory.getReport()); 230 Files.appendToFile(jamonlog, "<xmp>\n" 231 + JamonMonitorLogger.getStringForAllSortedByLabel()); 232 System.exit(0); 233 } 234 235 initialized = true; 236 } 237 238 @Override 239 public OWLOntology createOWLOntology(OWLOntologyManager manager) { 240 return getConverter().convert(ontologyBytes, manager); 241 } 242 243 public List<Node> extractParallel(){ 244 return null; 245 } 246 247 /*private class ExtractOneInstance implements Callable{ 248 Manager m; 249 Node n; 250 String uri; 251 252 private ExtractOneInstance(Manager m, String uri){ 253 super(); 254 this.m = m; 255 this.uri = uri; 256 } 257 258 public Node call(){ 259 System.out.println("funky"); 260 return m.extractOneURI(uri); 261 } 262 }*/ 263 264 /** 265 * @return the URL of the used sparql endpoint 266 */ 267 public URL getURL() { 268 if(endpoint == null){ 269 if(getUrl()==null){ 270 if(predefinedEndpoint == null){ 271 setUrl(url); 272 return getUrl(); 273 }else{ 274 return getSparqlEndpoint().getURL(); 275 } 276 277 }else{ 278 return getUrl(); 279 } 280 }else { 281 return endpoint.getURL(); 282 } 283 284 } 285 286 public SparqlQuery sparqlQuery(String query) { 287 return new SparqlQuery(query, getSparqlEndpoint()); 288 } 289 290 public SparqlEndpoint getSparqlEndpoint(){ 291 if(endpoint==null) { 292 if (predefinedEndpoint == null) { 293 endpoint = new SparqlEndpoint(getURL(), new LinkedList<>( 294 defaultGraphURIs), 295 new LinkedList<>(namedGraphURIs)); 296 } else { 297 endpoint = SparqlEndpoint.getEndpointByName(predefinedEndpoint); 298 // System.out.println(endpoint); 299 300 } 301 } 302 return endpoint; 303 304 } 305 306 public SPARQLTasks getSPARQLTasks() { 307 308 // get Options for endpoints 309 310 if (useCache){ 311 return new SPARQLTasks(new Cache(cacheDir, useCacheDatabase), 312 getSparqlEndpoint()); 313 }else { 314 return new SPARQLTasks(getSparqlEndpoint()); 315 } 316 } 317 318 public SparqlQueryMaker getSparqlQueryMaker() { 319 // get Options for Filters 320 if (predefinedFilter == null) { 321 return new SparqlQueryMaker("forbid", objList, 322 predList, useLits); 323 324 } else { 325 326 return SparqlQueryMaker.getSparqlQueryMakerByName(predefinedFilter); 327 } 328 329 } 330 331 public Manipulator getManipulator() { 332 333 if(this.manipulator!=null){ 334 return this.manipulator; 335 } 336 337 // get Options for Filters 338 if (predefinedManipulator != null) { 339 return Manipulator.getManipulatorByName(predefinedManipulator); 340 341 } else { 342 Manipulator m = Manipulator.getDefaultManipulator(); 343 for (StringTuple st : replacePredicate) { 344 m.addRule(new PredicateReplacementRule(Months.MAY, st.a, st.b)); 345 } 346 for (StringTuple st : replaceObject) { 347 m.addRule(new ObjectReplacementRule(Months.MAY, st.a, st.b)); 348 } 349 return m; 350 } 351 352 } 353 354 public void setManipulator(Manipulator m ){ 355 this.manipulator = m; 356 357 } 358 359 public TupleAquisitor getTupleAquisitor() { 360 TupleAquisitor ret = null; 361 if (useImprovedSparqlTupelAquisitor) { 362 ret = new SparqlTupleAquisitorImproved(getSparqlQueryMaker(), 363 getSPARQLTasks(), recursionDepth); 364 } else { 365 ret = new SparqlTupleAquisitor(getSparqlQueryMaker(), 366 getSPARQLTasks()); 367 } 368 return ret; 369 370 } 371 372 public URL getOntologyFragmentURL() { 373 return ontologyFragmentURL; 374 } 375 376 public boolean isUseCache() { 377 return useCache; 378 } 379 380 public String getCacheDir() { 381 return cacheDir; 382 } 383 384 public int getNrOfExtractedAxioms() { 385 return nrOfExtractedAxioms; 386 } 387 388 public void addProgressMonitor(ProgressMonitor mon){ 389 this.mon = mon; 390 } 391 392 public void setUrl(URL url) { 393 this.url = url; 394 } 395 396 public URL getUrl() { 397 return url; 398 } 399 400 public Set<String> getInstances() { 401 return instances; 402 } 403 404 public void setInstances(Set<String> instances) { 405 this.instances = instances; 406 } 407 408 public int getRecursionDepth() { 409 return recursionDepth; 410 } 411 412 public void setRecursionDepth(int recursionDepth) { 413 this.recursionDepth = recursionDepth; 414 } 415 416 public boolean isGetAllSuperClasses() { 417 return getAllSuperClasses; 418 } 419 420 public void setGetAllSuperClasses(boolean getAllSuperClasses) { 421 this.getAllSuperClasses = getAllSuperClasses; 422 } 423 424 public boolean isCloseAfterRecursion() { 425 return closeAfterRecursion; 426 } 427 428 public void setCloseAfterRecursion(boolean closeAfterRecursion) { 429 this.closeAfterRecursion = closeAfterRecursion; 430 } 431 432 public boolean isPropertyInformation() { 433 return propertyInformation; 434 } 435 436 public void setPropertyInformation(boolean propertyInformation) { 437 this.propertyInformation = propertyInformation; 438 } 439 440 public int getBreakSuperClassRetrievalAfter() { 441 return breakSuperClassRetrievalAfter; 442 } 443 444 public void setBreakSuperClassRetrievalAfter(int breakSuperClassRetrievalAfter) { 445 this.breakSuperClassRetrievalAfter = breakSuperClassRetrievalAfter; 446 } 447 448 public boolean isDissolveBlankNodes() { 449 return dissolveBlankNodes; 450 } 451 452 public void setDissolveBlankNodes(boolean dissolveBlankNodes) { 453 this.dissolveBlankNodes = dissolveBlankNodes; 454 } 455 456 public boolean isSaveExtractedFragment() { 457 return saveExtractedFragment; 458 } 459 460 public void setSaveExtractedFragment(boolean saveExtractedFragment) { 461 this.saveExtractedFragment = saveExtractedFragment; 462 } 463 464 public String getPredefinedEndpoint() { 465 return predefinedEndpoint; 466 } 467 468 public void setPredefinedEndpoint(String predefinedEndpoint) { 469 this.predefinedEndpoint = predefinedEndpoint; 470 } 471 472 public Collection<String> getDefaultGraphURIs() { 473 return defaultGraphURIs; 474 } 475 476 public void setDefaultGraphURIs(Collection<String> defaultGraphURIs) { 477 this.defaultGraphURIs = defaultGraphURIs; 478 } 479 480 public Collection<String> getNamedGraphURIs() { 481 return namedGraphURIs; 482 } 483 484 public void setNamedGraphURIs(Collection<String> namedGraphURIs) { 485 this.namedGraphURIs = namedGraphURIs; 486 } 487 488 public boolean isUseCacheDatabase() { 489 return useCacheDatabase; 490 } 491 492 public void setUseCacheDatabase(boolean useCacheDatabase) { 493 this.useCacheDatabase = useCacheDatabase; 494 } 495 496 public String getPredefinedFilter() { 497 return predefinedFilter; 498 } 499 500 public void setPredefinedFilter(String predefinedFilter) { 501 this.predefinedFilter = predefinedFilter; 502 } 503 504 public Set<String> getObjList() { 505 return objList; 506 } 507 508 public void setObjList(Set<String> objList) { 509 this.objList = objList; 510 } 511 512 public Set<String> getPredList() { 513 return predList; 514 } 515 516 public void setPredList(Set<String> predList) { 517 this.predList = predList; 518 } 519 520 public boolean isUseLits() { 521 return useLits; 522 } 523 524 public void setUseLits(boolean useLits) { 525 this.useLits = useLits; 526 } 527 528 public String getPredefinedManipulator() { 529 return predefinedManipulator; 530 } 531 532 public void setPredefinedManipulator(String predefinedManipulator) { 533 this.predefinedManipulator = predefinedManipulator; 534 } 535 536 public List<StringTuple> getReplacePredicate() { 537 return replacePredicate; 538 } 539 540 public void setReplacePredicate(List<StringTuple> replacePredicate) { 541 this.replacePredicate = replacePredicate; 542 } 543 544 public boolean isUseImprovedSparqlTupelAquisitor() { 545 return useImprovedSparqlTupelAquisitor; 546 } 547 548 public void setUseImprovedSparqlTupelAquisitor(boolean useImprovedSparqlTupelAquisitor) { 549 this.useImprovedSparqlTupelAquisitor = useImprovedSparqlTupelAquisitor; 550 } 551 552 public List<StringTuple> getReplaceObject() { 553 return replaceObject; 554 } 555 556 public void setReplaceObject(List<StringTuple> replaceObject) { 557 this.replaceObject = replaceObject; 558 } 559 560 public void setUseCache(boolean useCache) { 561 this.useCache = useCache; 562 } 563 564 public void setCacheDir(String cacheDir) { 565 this.cacheDir = cacheDir; 566 } 567 568 /** 569 * Get the OntologyToByteConverter associated with this object. 570 * 571 * @return The OntologyToByteConverter associated with this object. 572 */ 573 public OntologyToByteConverter getConverter() { 574 return converter; 575 } 576 577 /** 578 * Set the OntologyToByteConverter associated with this object. 579 * 580 * @param converter the OntologyToByteConverter to associate with this object. 581 */ 582 public void setConverter(OntologyToByteConverter converter) { 583 this.converter = converter; 584 } 585 586 /** 587 * Accessor for getting the Ontology Bytes 588 * 589 * @return Get the underlying ontology bytes. 590 */ 591 byte[] getOntologyBytes() { 592 return ontologyBytes; 593 } 594 595 /** 596 * Set the ontology bytes. 597 * 598 * @param ontologyBytes The byte array representation of the fragment. 599 */ 600 void setOntologyBytes(byte[] ontologyBytes) { 601 this.ontologyBytes = ontologyBytes; 602 } 603}