001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.sparql;
020
021import com.jamonapi.Monitor;
022import com.jamonapi.MonitorFactory;
023import org.apache.log4j.Logger;
024import org.dllearner.core.AbstractKnowledgeSource;
025import org.dllearner.core.ComponentAnn;
026import org.dllearner.kb.OWLOntologyKnowledgeSource;
027import org.dllearner.kb.aquisitors.SparqlTupleAquisitor;
028import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
029import org.dllearner.kb.aquisitors.TupleAquisitor;
030import org.dllearner.kb.extraction.Configuration;
031import org.dllearner.kb.extraction.Manager;
032import org.dllearner.kb.extraction.Node;
033import org.dllearner.kb.manipulator.Manipulator;
034import org.dllearner.kb.manipulator.ObjectReplacementRule;
035import org.dllearner.kb.manipulator.PredicateReplacementRule;
036import org.dllearner.kb.manipulator.Rule.Months;
037import org.dllearner.utilities.Files;
038import org.dllearner.utilities.JamonMonitorLogger;
039import org.dllearner.utilities.datastructures.StringTuple;
040import org.dllearner.utilities.owl.OntologyToByteConverter;
041import org.dllearner.utilities.owl.SimpleOntologyToByteConverter;
042import org.dllearner.utilities.statistics.SimpleClock;
043import org.semanticweb.owlapi.model.OWLOntology;
044import org.semanticweb.owlapi.model.OWLOntologyManager;
045
046import javax.swing.*;
047import java.io.File;
048import java.net.URL;
049import java.util.*;
050
051/**
052 * Represents the SPARQL Endpoint Component.
053 *
054 * @author Jens Lehmann
055 * @author Sebastian Knappe
056 * @author Sebastian Hellmann
057 */
058@ComponentAnn(name = "SPARQL endpoint fragment", shortName = "sparqlfrag", version = 0.5)
059public class SparqlKnowledgeSource extends AbstractKnowledgeSource implements OWLOntologyKnowledgeSource{
060
061        private ProgressMonitor mon;
062
063        private static final boolean debugExitAfterExtraction = false; // switches
064
065    private byte[] ontologyBytes;
066    private OntologyToByteConverter converter = new SimpleOntologyToByteConverter();
067
068        public SparqlKnowledgeSource() {}
069
070        public SparqlKnowledgeSource(URL url, Set<String> instances) {
071                this.url = url;
072                this.instances = instances;
073        }
074
075        private SparqlEndpoint endpoint = null;
076
077        //private String format = "N-TRIPLES";
078        //private String format = "RDF/XML";
079
080        private URL ontologyFragmentURL;
081
082        private Manipulator manipulator = null;
083
084        // received ontology as array, used if format=Array(an element of the
085        // array consists of the subject, predicate and object separated by '<'
086        //private String[] ontArray;
087
088        // received ontology as KB, the internal format
089        //private KB kb;
090
091        // mainly used for statistic
092        private int nrOfExtractedAxioms = 0;
093
094        //// TODO: turn those into config options ///
095        private URL url;
096
097        private Set<String> instances;
098
099        private int recursionDepth = 1;
100
101        private boolean getAllSuperClasses = true;
102
103        private boolean closeAfterRecursion = true;
104
105        private boolean propertyInformation;
106
107        private int breakSuperClassRetrievalAfter = 1000;
108
109        private boolean dissolveBlankNodes = true;
110
111        private boolean saveExtractedFragment = false;
112
113        private String predefinedEndpoint;
114
115        private Collection<String> defaultGraphURIs = new LinkedList<>();
116
117        private Collection<String> namedGraphURIs = new LinkedList<>();
118
119        private boolean useCache = true;
120
121        private String cacheDir = "cache";
122
123        private boolean useCacheDatabase;
124
125        private String predefinedFilter;
126
127        private Set<String> objList = new TreeSet<>();
128
129        private Set<String> predList = new TreeSet<>() ;
130
131        private boolean useLits = true;
132
133        private String predefinedManipulator;
134
135        private List<StringTuple> replacePredicate  = new LinkedList<>();
136
137        private boolean useImprovedSparqlTupelAquisitor;
138
139        private List<StringTuple> replaceObject  = new LinkedList<>();
140
141        private static Logger logger = Logger
142                        .getLogger(SparqlKnowledgeSource.class);
143
144        /*
145         * (non-Javadoc)
146         *
147         * @see org.dllearner.core.Component#init()
148         */
149        @Override
150        public void init() {
151                logger.info("SparqlModul: Collecting Ontology");
152                SimpleClock totalTime = new SimpleClock();
153                //SimpleClock extractionTime = new SimpleClock();
154                if(mon != null){
155                        mon.setNote("Collecting Ontology");
156                }
157                logger.trace(getURL());
158                logger.trace(getSparqlEndpoint());
159//              logger.trace(configurator.getInstances());
160                Manager m = new Manager();
161                m.addProgressMonitor(mon);
162
163                // get Options for Manipulator
164                Manipulator manipulator = getManipulator();
165
166                TupleAquisitor tupleAquisitor = getTupleAquisitor();
167
168                Configuration configuration = new Configuration(tupleAquisitor,
169                                manipulator, recursionDepth, getAllSuperClasses,
170                                                closeAfterRecursion, propertyInformation, breakSuperClassRetrievalAfter,
171                                                dissolveBlankNodes);
172
173                // give everything to the manager
174                m.useConfiguration(configuration);
175
176                //String ont = "";
177                try {
178
179                        // the actual extraction is started here
180                        Monitor extractionTime = JamonMonitorLogger.getTimeMonitor(SparqlKnowledgeSource.class, "total extraction time").start();
181                        List<Node> seedNodes= new ArrayList<>();
182
183                        //if(!threaded){
184                                seedNodes = m.extract(instances);
185                        /*}else{
186                                int maxPoolSize = configurator.getInstances().size();
187                                ThreadPoolExecutor ex = new ThreadPoolExecutor(5,maxPoolSize,1,TimeUnit.SECONDS,new ArrayBlockingQueue<Runnable>(100));
188                                List<FutureTask<Node>> tasks = new ArrayList<FutureTask<Node>>();
189
190                                for (String uri : configurator.getInstances()) {
191
192                                        ExtractOneInstance e = new ExtractOneInstance(m,uri);
193
194                                        FutureTask<Node> ft = new FutureTask<Node>(e);
195                                        ex.submit(ft);
196                                        tasks.add(ft);
197                                        //System.out.println(f.get());
198                                        //seedNodes.add(f.get());
199                                        //System.out.println("finished FutureTask "+seedNodes.size());
200                                }
201                                for(FutureTask<Node> ft : tasks){
202                                        //System.out.println(ft.get());
203                                        //System.out.println("aaa");
204                                        seedNodes.add(ft.get());
205
206                                }
207                        }*/
208                        extractionTime.stop();
209
210                        // Do this so that we can support the OWLOntologyKnowledgeSource
211            // and can be thread safe.
212                        OWLOntology fragment = m.getOWLAPIOntologyForNodes(seedNodes, saveExtractedFragment);
213            ontologyBytes = getConverter().convert(fragment);
214
215                        logger.info("Finished collecting fragment. needed "+extractionTime.getLastValue()+" ms");
216
217                        ontologyFragmentURL = m.getPhysicalOntologyURL();
218
219                        nrOfExtractedAxioms = configuration.getOwlAPIOntologyCollector().getNrOfExtractedAxioms();
220
221                } catch (Exception e) {
222                        e.printStackTrace();
223                }
224                //nrOfExtractedTriples = m.getNrOfExtractedTriples();
225                logger.info("SparqlModul: ****Finished " + totalTime.getAndSet(""));
226                if (debugExitAfterExtraction) {
227
228                        File jamonlog = new File("log/jamon.html");
229                        Files.createFile(jamonlog, MonitorFactory.getReport());
230                        Files.appendToFile(jamonlog, "<xmp>\n"
231                                        + JamonMonitorLogger.getStringForAllSortedByLabel());
232                        System.exit(0);
233                }
234                
235                initialized = true;
236        }
237
238    @Override
239    public OWLOntology createOWLOntology(OWLOntologyManager manager) {
240        return getConverter().convert(ontologyBytes, manager);
241    }
242
243    public List<Node> extractParallel(){
244                return null;
245        }
246
247        /*private class ExtractOneInstance  implements Callable{
248                Manager m;
249                Node n;
250                String uri;
251
252                private ExtractOneInstance(Manager m, String uri){
253                        super();
254                        this.m = m;
255                        this.uri = uri;
256                }
257
258                public Node call(){
259                        System.out.println("funky");
260                        return m.extractOneURI(uri);
261                }
262        }*/
263
264        /**
265         * @return the URL of the used sparql endpoint
266         */
267        public URL getURL() {
268                if(endpoint == null){
269                        if(getUrl()==null){
270                                if(predefinedEndpoint == null){
271                                                setUrl(url);
272                                        return getUrl();
273                                }else{
274                                        return getSparqlEndpoint().getURL();
275                                }
276
277                        }else{
278                                return getUrl();
279                        }
280                }else {
281                        return endpoint.getURL();
282                }
283
284        }
285
286        public SparqlQuery sparqlQuery(String query) {
287                return new SparqlQuery(query, getSparqlEndpoint());
288        }
289
290        public SparqlEndpoint getSparqlEndpoint(){
291                if(endpoint==null) {
292                        if (predefinedEndpoint == null) {
293                                endpoint = new SparqlEndpoint(getURL(), new LinkedList<>(
294                                                defaultGraphURIs),
295                                                new LinkedList<>(namedGraphURIs));
296                        } else {
297                                endpoint = SparqlEndpoint.getEndpointByName(predefinedEndpoint);
298                                // System.out.println(endpoint);
299
300                        }
301                }
302                return endpoint;
303
304        }
305
306        public SPARQLTasks getSPARQLTasks() {
307
308                // get Options for endpoints
309
310                if (useCache){
311                        return new SPARQLTasks(new Cache(cacheDir, useCacheDatabase),
312                                        getSparqlEndpoint());
313                }else {
314                        return new SPARQLTasks(getSparqlEndpoint());
315                }
316        }
317
318        public SparqlQueryMaker getSparqlQueryMaker() {
319                // get Options for Filters
320                if (predefinedFilter == null) {
321                        return new SparqlQueryMaker("forbid", objList,
322                                        predList, useLits);
323
324                } else {
325
326                        return SparqlQueryMaker.getSparqlQueryMakerByName(predefinedFilter);
327                }
328
329        }
330
331        public Manipulator getManipulator() {
332
333                if(this.manipulator!=null){
334                        return this.manipulator;
335                }
336
337                // get Options for Filters
338                if (predefinedManipulator != null) {
339                        return Manipulator.getManipulatorByName(predefinedManipulator);
340
341                } else {
342                        Manipulator m = Manipulator.getDefaultManipulator();
343                        for (StringTuple st : replacePredicate) {
344                                m.addRule(new PredicateReplacementRule(Months.MAY, st.a, st.b));
345                        }
346                        for (StringTuple st : replaceObject) {
347                                m.addRule(new ObjectReplacementRule(Months.MAY, st.a, st.b));
348                        }
349                        return m;
350                }
351
352        }
353
354        public void setManipulator(Manipulator m ){
355                this.manipulator = m;
356
357        }
358
359        public TupleAquisitor getTupleAquisitor() {
360                TupleAquisitor ret = null;
361                if (useImprovedSparqlTupelAquisitor) {
362                        ret = new SparqlTupleAquisitorImproved(getSparqlQueryMaker(),
363                                        getSPARQLTasks(), recursionDepth);
364                } else {
365                        ret = new SparqlTupleAquisitor(getSparqlQueryMaker(),
366                                        getSPARQLTasks());
367                }
368                return ret;
369
370        }
371
372        public URL getOntologyFragmentURL() {
373                return ontologyFragmentURL;
374        }
375
376        public boolean isUseCache() {
377                return useCache;
378        }
379
380        public String getCacheDir() {
381                return cacheDir;
382        }
383
384        public int getNrOfExtractedAxioms() {
385                return nrOfExtractedAxioms;
386        }
387
388        public void addProgressMonitor(ProgressMonitor mon){
389                this.mon = mon;
390        }
391
392        public void setUrl(URL url) {
393                this.url = url;
394        }
395
396        public URL getUrl() {
397                return url;
398        }
399
400        public Set<String> getInstances() {
401                return instances;
402        }
403
404        public void setInstances(Set<String> instances) {
405                this.instances = instances;
406        }
407
408        public int getRecursionDepth() {
409                return recursionDepth;
410        }
411
412        public void setRecursionDepth(int recursionDepth) {
413                this.recursionDepth = recursionDepth;
414        }
415
416        public boolean isGetAllSuperClasses() {
417                return getAllSuperClasses;
418        }
419
420        public void setGetAllSuperClasses(boolean getAllSuperClasses) {
421                this.getAllSuperClasses = getAllSuperClasses;
422        }
423
424        public boolean isCloseAfterRecursion() {
425                return closeAfterRecursion;
426        }
427
428        public void setCloseAfterRecursion(boolean closeAfterRecursion) {
429                this.closeAfterRecursion = closeAfterRecursion;
430        }
431
432        public boolean isPropertyInformation() {
433                return propertyInformation;
434        }
435
436        public void setPropertyInformation(boolean propertyInformation) {
437                this.propertyInformation = propertyInformation;
438        }
439
440        public int getBreakSuperClassRetrievalAfter() {
441                return breakSuperClassRetrievalAfter;
442        }
443
444        public void setBreakSuperClassRetrievalAfter(int breakSuperClassRetrievalAfter) {
445                this.breakSuperClassRetrievalAfter = breakSuperClassRetrievalAfter;
446        }
447
448        public boolean isDissolveBlankNodes() {
449                return dissolveBlankNodes;
450        }
451
452        public void setDissolveBlankNodes(boolean dissolveBlankNodes) {
453                this.dissolveBlankNodes = dissolveBlankNodes;
454        }
455
456        public boolean isSaveExtractedFragment() {
457                return saveExtractedFragment;
458        }
459
460        public void setSaveExtractedFragment(boolean saveExtractedFragment) {
461                this.saveExtractedFragment = saveExtractedFragment;
462        }
463
464        public String getPredefinedEndpoint() {
465                return predefinedEndpoint;
466        }
467
468        public void setPredefinedEndpoint(String predefinedEndpoint) {
469                this.predefinedEndpoint = predefinedEndpoint;
470        }
471
472        public Collection<String> getDefaultGraphURIs() {
473                return defaultGraphURIs;
474        }
475
476        public void setDefaultGraphURIs(Collection<String> defaultGraphURIs) {
477                this.defaultGraphURIs = defaultGraphURIs;
478        }
479
480        public Collection<String> getNamedGraphURIs() {
481                return namedGraphURIs;
482        }
483
484        public void setNamedGraphURIs(Collection<String> namedGraphURIs) {
485                this.namedGraphURIs = namedGraphURIs;
486        }
487
488        public boolean isUseCacheDatabase() {
489                return useCacheDatabase;
490        }
491
492        public void setUseCacheDatabase(boolean useCacheDatabase) {
493                this.useCacheDatabase = useCacheDatabase;
494        }
495
496        public String getPredefinedFilter() {
497                return predefinedFilter;
498        }
499
500        public void setPredefinedFilter(String predefinedFilter) {
501                this.predefinedFilter = predefinedFilter;
502        }
503
504        public Set<String> getObjList() {
505                return objList;
506        }
507
508        public void setObjList(Set<String> objList) {
509                this.objList = objList;
510        }
511
512        public Set<String> getPredList() {
513                return predList;
514        }
515
516        public void setPredList(Set<String> predList) {
517                this.predList = predList;
518        }
519
520        public boolean isUseLits() {
521                return useLits;
522        }
523
524        public void setUseLits(boolean useLits) {
525                this.useLits = useLits;
526        }
527
528        public String getPredefinedManipulator() {
529                return predefinedManipulator;
530        }
531
532        public void setPredefinedManipulator(String predefinedManipulator) {
533                this.predefinedManipulator = predefinedManipulator;
534        }
535
536        public List<StringTuple> getReplacePredicate() {
537                return replacePredicate;
538        }
539
540        public void setReplacePredicate(List<StringTuple> replacePredicate) {
541                this.replacePredicate = replacePredicate;
542        }
543
544        public boolean isUseImprovedSparqlTupelAquisitor() {
545                return useImprovedSparqlTupelAquisitor;
546        }
547
548        public void setUseImprovedSparqlTupelAquisitor(boolean useImprovedSparqlTupelAquisitor) {
549                this.useImprovedSparqlTupelAquisitor = useImprovedSparqlTupelAquisitor;
550        }
551
552        public List<StringTuple> getReplaceObject() {
553                return replaceObject;
554        }
555
556        public void setReplaceObject(List<StringTuple> replaceObject) {
557                this.replaceObject = replaceObject;
558        }
559
560        public void setUseCache(boolean useCache) {
561                this.useCache = useCache;
562        }
563
564        public void setCacheDir(String cacheDir) {
565                this.cacheDir = cacheDir;
566        }
567
568    /**
569     * Get the OntologyToByteConverter associated with this object.
570     *
571     * @return The OntologyToByteConverter associated with this object.
572     */
573    public OntologyToByteConverter getConverter() {
574        return converter;
575    }
576
577    /**
578     * Set the OntologyToByteConverter associated with this object.
579     *
580     * @param converter the OntologyToByteConverter to associate with this object.
581     */
582    public void setConverter(OntologyToByteConverter converter) {
583        this.converter = converter;
584    }
585
586    /**
587     * Accessor for getting the Ontology Bytes
588     *
589     * @return Get the underlying ontology bytes.
590     */
591    byte[] getOntologyBytes() {
592        return ontologyBytes;
593    }
594
595    /**
596     * Set the ontology bytes.
597     *
598     * @param ontologyBytes The byte array representation of the fragment.
599     */
600    void setOntologyBytes(byte[] ontologyBytes) {
601        this.ontologyBytes = ontologyBytes;
602    }
603}