Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.properties;
020
021import com.google.common.collect.Maps;
022import com.google.common.collect.Sets;
023import com.google.common.collect.Sets.SetView;
024import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
025import org.apache.jena.query.ParameterizedSparqlString;
026import org.apache.jena.query.Query;
027import org.apache.jena.query.QueryExecution;
028import org.apache.jena.rdf.model.Model;
029import org.apache.jena.rdf.model.ModelFactory;
030import org.apache.jena.riot.system.ErrorHandlerFactory;
031import org.dllearner.algorithms.properties.AxiomAlgorithms.AxiomTypeCluster;
032import org.dllearner.core.*;
033import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
034import org.dllearner.kb.SparqlEndpointKS;
035import org.dllearner.kb.sparql.SparqlEndpoint;
036import org.dllearner.reasoning.SPARQLReasoner;
037import org.dllearner.utilities.OWLAPIUtils;
038import org.semanticweb.owlapi.model.*;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl;
042
043import java.net.URL;
044import java.util.*;
045import java.util.concurrent.ExecutorService;
046import java.util.concurrent.Executors;
047import java.util.concurrent.TimeUnit;
048
049/**
050 * This is a wrapper class to handle more than one property axiom type in a more intelligent way,
051 * e.g. when sampling is activated it might be better to generate a unique sample and apply the 
052 * algorithms on that sample afterwards.
053 * Note that this only works for subsets of axiom types that have the same sample structure.
054 * </br>
055 * Additionally, this class is able to configure and run the algorithms in a parallel way.
056 * @author Lorenz Buehmann
057 *
058 */
059public class MultiPropertyAxiomLearner {
060        
061        private static final Logger logger = LoggerFactory.getLogger(MultiPropertyAxiomLearner.class);
062        
063        private SparqlEndpointKS ks;
064        private SPARQLReasoner reasoner;
065        private QueryExecutionFactory qef;
066
067        private boolean useSampling = false;
068        private long maxSampleGenerationTimeMilliseconds = 10000;
069        private long pageSize = 10000;
070
071        private boolean multiThreaded = false;
072        private int maxNrOfThreads = 1;
073        
074        private long maxExecutionTimeMilliseconds = -1;
075
076        private long startTime;
077
078        private AxiomLearningProgressMonitor progressMonitor = new SilentAxiomLearningProgressMonitor();
079        
080        private Map<AxiomType<? extends OWLAxiom>, List<EvaluatedAxiom<OWLAxiom>>> results;
081
082        private OWLEntity entity;
083
084        private Set<AxiomType<? extends OWLAxiom>> axiomTypes;
085        
086        private Map<AxiomType<? extends OWLAxiom>, AbstractAxiomLearningAlgorithm> algorithms = new HashMap<>();
087        
088        public MultiPropertyAxiomLearner(SparqlEndpointKS ks) {
089                this(ks.getQueryExecutionFactory());
090                this.ks = ks;
091        }
092        
093        public MultiPropertyAxiomLearner(QueryExecutionFactory qef) {
094                this.qef = qef;
095                this.reasoner = new SPARQLReasoner(qef);
096        }
097        
098        public void start(){
099                startTime = System.currentTimeMillis();
100
101                checkConfigOptions();
102                
103                // check if entity is empty
104                int popularity = reasoner.getPopularity(entity);
105                if(popularity == 0){
106                        logger.warn("Cannot make axiom suggestions for empty " + entity.getEntityType().getName() + " " + entity.toStringID());
107                        return;
108                }
109                
110                results = Maps.newConcurrentMap();
111                
112                EntityType<?> entityType = entity.getEntityType();
113                
114                // check for axiom types that are not appropriate for the given entity
115                Set<AxiomType<? extends OWLAxiom>> possibleAxiomTypes = AxiomAlgorithms.getAxiomTypes(entityType);
116                SetView<AxiomType<? extends OWLAxiom>> notAllowed = Sets.difference(axiomTypes, possibleAxiomTypes);
117                if(!notAllowed.isEmpty()){
118                        logger.warn("Not supported axiom types for entity " + entity + " :" + notAllowed);
119                }
120                
121                Set<AxiomType<? extends OWLAxiom>> todo = Sets.intersection(axiomTypes,  possibleAxiomTypes);
122                
123                // compute samples for axiom types
124                Set<AxiomTypeCluster> sampleClusters = AxiomAlgorithms.getSameSampleClusters(entityType);
125                
126                ExecutorService tp = Executors.newFixedThreadPool(maxNrOfThreads);
127                
128                for (final AxiomTypeCluster cluster : sampleClusters) {
129                        final SetView<AxiomType<? extends OWLAxiom>> sampleAxiomTypes = Sets.intersection(cluster.getAxiomTypes(), todo);
130                        
131                        if(!sampleAxiomTypes.isEmpty()){
132                                tp.submit(() -> {
133                                        try {
134                                                SparqlEndpointKS ks1 = MultiPropertyAxiomLearner.this.ks;
135
136                                                // get sample if enabled
137                                                if(useSampling){
138                                                        Model sample = generateSample(entity, cluster);
139
140                                                        // if sampling failed, we skip
141                                                        if(sample == null) {
142                                                                return;
143                                                        }
144
145                                                        // if the sample is empty, we skip and show warning
146                                                        if(sample.isEmpty()) {
147                                                                logger.warn("Empty sample. Skipped learning.");
148                                                                return;
149                                                        }
150
151                                                        ks1 = new LocalModelBasedSparqlEndpointKS(sample);
152                                                }
153
154                                                // process each axiom type
155                                                for (AxiomType<? extends OWLAxiom> axiomType : sampleAxiomTypes) {
156                                                        try {
157                                                                List<EvaluatedAxiom<OWLAxiom>> result = applyAlgorithm(axiomType, ks1);
158                                                                results.put(axiomType, result);
159                                                        } catch (Exception e) {
160                                                                logger.error("An error occurred while generating " + axiomType.getName() +
161                                                                                " axioms for " + OWLAPIUtils.getPrintName(entity.getEntityType()) + " " + entity.toStringID(), e);
162                                                        }
163                                                }
164                                        } catch (Exception e) {
165                                                logger.error("Failed to process " + cluster, e);
166                                        }
167                                });
168                                
169                        }
170                }
171                
172                try {
173                        tp.shutdown();
174                        tp.awaitTermination(1, TimeUnit.HOURS);
175                } catch (InterruptedException e) {
176                        e.printStackTrace();
177                }
178//              
179//              for (AxiomType<? extends OWLAxiom> axiomType : todo) {
180//                      try {
181//                              applyAlgorithm(entity, axiomType, useSampling ? axiomType2Ks.get(axiomType) : ks);
182//                      } catch (Exception e) {
183//                              logger.error("Error occurred while generating " + axiomType.getName() + " for entity " + entity, e);
184//                      }
185//              }
186        }
187        
188        public Map<AxiomType<? extends OWLAxiom>, List<EvaluatedAxiom<OWLAxiom>>> getCurrentlyBestEvaluatedAxioms() {
189                return results;
190        }
191        
192        public List<EvaluatedAxiom<OWLAxiom>> getCurrentlyBestEvaluatedAxioms(AxiomType<? extends OWLAxiom> axiomType) {
193                return new ArrayList<>(results.get(axiomType));
194        }
195
196        public List<EvaluatedAxiom<OWLAxiom>> getCurrentlyBestEvaluatedAxioms(AxiomType<? extends OWLAxiom> axiomType, double accuracyThreshold) {
197                List<EvaluatedAxiom<OWLAxiom>> result = results.get(axiomType);
198                
199                // throw exception if computation failed
200                if(result == null) {
201                        return Collections.emptyList();
202//                      throw new NoResult
203                }
204                
205                // get all axioms above threshold
206                List<EvaluatedAxiom<OWLAxiom>> bestAxioms = new ArrayList<>();
207                for (EvaluatedAxiom<OWLAxiom> axiom : result) {
208                        if(axiom.getScore().getAccuracy() >= accuracyThreshold){
209                                bestAxioms.add(axiom);
210                        }
211                }
212
213                return bestAxioms;
214        }
215
216        public void setProgressMonitor(AxiomLearningProgressMonitor progressMonitor) {
217                this.progressMonitor = progressMonitor;
218        }
219
220        public void setEntityToDescribe(OWLEntity entity){
221                this.entity = entity;
222        }
223
224        /**
225         * Set the axiom types that will be processed. Note, a sanity check is done such that only axiom types
226         * will be processed that correspond to the declared entity.
227         * @param axiomTypes the axiom types
228         */
229        public void setAxiomTypes(Set<AxiomType<? extends OWLAxiom>> axiomTypes){
230                this.axiomTypes = axiomTypes;
231        }
232        
233        public AbstractAxiomLearningAlgorithm getAlgorithm(AxiomType<? extends OWLAxiom> axiomType) {
234                return algorithms.get(axiomType);
235        }
236
237        public Set<OWLObject> getPositives(AxiomType<? extends OWLAxiom> axiomType, EvaluatedAxiom<OWLAxiom> axiom){
238                AbstractAxiomLearningAlgorithm la = algorithms.get(axiomType);
239                return la.getPositiveExamples(axiom);
240        }
241
242        public Set<OWLObject> getNegatives(AxiomType<? extends OWLAxiom> axiomType, EvaluatedAxiom<OWLAxiom> axiom){
243                AbstractAxiomLearningAlgorithm la = algorithms.get(axiomType);
244                return la.getNegativeExamples(axiom);
245        }
246
247        /**
248         * @param useSampling the useSampling to set
249         */
250        public void setUseSampling(boolean useSampling) {
251                this.useSampling = useSampling;
252        }
253
254        /**
255         * @param multiThreaded whether to enable multi-threaded execution (@see setMaxNrOfThreads)
256         */
257        public void setMultiThreaded(boolean multiThreaded) {
258                this.multiThreaded = multiThreaded;
259        }
260
261        /**
262         * @param maxNrOfThreads the max. nr of threads
263         */
264        public void setMaxNrOfThreads(int maxNrOfThreads) {
265                this.maxNrOfThreads = maxNrOfThreads;
266        }
267
268        /**
269         * Set the maximum execution time. Note, this value represents the total computation time of all axiom types that have
270         * been set, thus, it's recommended to increase the runtime.
271         * @param executionTimeDuration the execution time
272         * @param executionTimeUnit the time unit
273         */
274        public void setMaxExecutionTime(long executionTimeDuration, TimeUnit executionTimeUnit) {
275                this.maxExecutionTimeMilliseconds = executionTimeUnit.toMillis(executionTimeDuration);
276        }
277
278        /**
279         * Set the maximum time to generate a sample. Note, this value represents the time to compute a single sample, thus,
280         * this time is spend for each axiom type (resp. cluster of axiom types).
281         * @param sampleGenerationTimeDuration the sample generation time
282         * @param sampleGenerationTimeUnit the time unit
283         */
284        public void setMaxSampleGenerationTime(long sampleGenerationTimeDuration, TimeUnit sampleGenerationTimeUnit) {
285                this.maxSampleGenerationTimeMilliseconds = sampleGenerationTimeUnit.toMillis(sampleGenerationTimeDuration);
286        }
287
288        private List<EvaluatedAxiom<OWLAxiom>> applyAlgorithm(AxiomType<? extends OWLAxiom> axiomType, SparqlEndpointKS ks) throws ComponentInitException{
289                Class<? extends AbstractAxiomLearningAlgorithm<? extends OWLAxiom, ? extends OWLObject, ? extends OWLEntity>> algorithmClass = AxiomAlgorithms.getAlgorithmClass(axiomType);
290                AbstractAxiomLearningAlgorithm learner = null;
291                try {
292                        learner = algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance(ks);
293
294                        learner.setEntityToDescribe(entity);
295                        learner.setUseSampling(false);
296                        learner.setProgressMonitor(progressMonitor);
297                        learner.init();
298                        learner.start();
299
300                        algorithms.put(axiomType, learner);
301
302                        return learner.getCurrentlyBestEvaluatedAxioms();
303                } catch (ComponentInitException e) {
304                        throw e;
305                } catch (Exception e) {
306                        throw new RuntimeException("Failed to apply algorithm for " + axiomType + " axioms on entity " + entity);
307                }
308        }
309
310        private Model generateSample(OWLEntity entity, AxiomTypeCluster cluster){
311                logger.info("Generating sample (" + cluster + ") for " + OWLAPIUtils.getPrintName(entity.getEntityType()) + " " + entity.toStringID() + "...");
312                long startTime = System.currentTimeMillis();
313
314                Model sample = ModelFactory.createDefaultModel();
315                
316                ParameterizedSparqlString sampleQueryTemplate = cluster.getSampleQuery();
317                sampleQueryTemplate.clearParam("entity");
318                sampleQueryTemplate.setIri("entity", entity.toStringID());
319                
320                Query query = sampleQueryTemplate.asQuery();
321                query.setLimit(pageSize);
322                
323                boolean isEmpty = false;
324                boolean samplingTimeout = false;
325                int i = 0;
326                while(!isEmpty && !samplingTimeout && !isTimeout()){
327                        // get next sample
328                        logger.debug("Extending sample...");
329                        query.setOffset(i++ * pageSize);
330                        logger.debug("sending query\n" + query);
331                        try (QueryExecution qe=qef.createQueryExecution(query)) {
332                                // set the remaining runtime
333                                qe.setTimeout(maxSampleGenerationTimeMilliseconds - (System.currentTimeMillis() - startTime));
334
335                                // execute query
336                                Model tmp = qe.execConstruct();
337                                sample.add(tmp);
338
339                                // if last call returned empty model, we can leave loop
340                                isEmpty = tmp.isEmpty();
341                        } catch (Exception e) {
342                                boolean syntaxError = e instanceof RuntimeException &&
343                                                e.getCause() != null &&
344                                                e.getCause() instanceof org.apache.jena.riot.RiotException;
345                                logger.error("Sample generation for " + cluster + " failed. Reason:\n");
346                                if(syntaxError) {
347                                        logger.error("Endpoint returned illegal data with error\n" + e.getCause().getMessage()
348                                                                                 + "\nfor query\n" + query + "\n");
349                                } else {
350                                        logger.error("Query execution failed for query\n" + query, e);
351                                }
352
353                                return null;
354                        }
355                        // checker for sampling timeout
356                        samplingTimeout = (System.currentTimeMillis() - startTime) >= maxSampleGenerationTimeMilliseconds;
357                        if(samplingTimeout) {
358                                logger.info("Sampling timeout.");
359                        }
360                }
361                logger.info("Finished generating sample. Sample size: " + sample.size() + " triples");
362                if(isEmpty) {
363                        logger.info("Sample contains the whole relevant data.");
364                }
365                return sample;
366        }
367        
368        private boolean isTimeout(){
369                return maxExecutionTimeMilliseconds > 0 && getRemainingRuntimeMilliSeconds() <= 0;
370        }
371        
372        private long getRemainingRuntimeMilliSeconds(){
373                long duration = System.currentTimeMillis() - startTime;
374                return Math.max(0, (maxExecutionTimeMilliseconds - duration));
375        }
376
377        private void checkConfigOptions() {
378                if(multiThreaded && maxNrOfThreads == 1) {
379                        logger.warn("You enabled multi-threaded execution but set the number of threads to 1. " +
380                                                                "You probably want to increase this value.");
381                }
382
383                if (useSampling && maxExecutionTimeMilliseconds <= 0) {
384                        logger.warn(
385                                        "You enabled sampling but set no execution timeout. This means that the whole data will be loaded " +
386                                                        "locally which might be time and resource consuming. We suggest to either set a timeout or " +
387                                                        "disable the sampling in that case.");
388                }
389        }
390
391        public static void main(String[] args) throws Exception{
392                ErrorHandlerFactory.setDefaultErrorHandler(ErrorHandlerFactory.errorHandlerStrictNoLogging);
393                SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://dbpedia.org/sparql"), "http://dbpedia.org");
394//              endpoint = SparqlEndpoint.getEndpointDBpedia();
395                SparqlEndpointKS ks = new SparqlEndpointKS(endpoint);
396                ks.init();
397
398                OWLEntity entity = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/author"));
399
400                MultiPropertyAxiomLearner la = new MultiPropertyAxiomLearner(ks);
401                la.setEntityToDescribe(entity);
402
403                la.setUseSampling(true);
404                la.setMaxSampleGenerationTime(10, TimeUnit.SECONDS);
405
406                la.setMaxNrOfThreads(1);
407                la.setAxiomTypes(Sets.newHashSet(
408                                AxiomType.OBJECT_PROPERTY_DOMAIN,
409                                AxiomType.OBJECT_PROPERTY_RANGE,
410                                AxiomType.SUB_OBJECT_PROPERTY, AxiomType.EQUIVALENT_OBJECT_PROPERTIES,
411                                AxiomType.FUNCTIONAL_OBJECT_PROPERTY, AxiomType.ASYMMETRIC_OBJECT_PROPERTY, AxiomType.IRREFLEXIVE_OBJECT_PROPERTY,
412                                AxiomType.TRANSITIVE_OBJECT_PROPERTY
413                )
414                );
415                la.setMaxExecutionTime(1, TimeUnit.MINUTES);
416                la.start();
417                
418        }
419
420}