001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.properties; 020 021import com.google.common.collect.Maps; 022import com.google.common.collect.Sets; 023import com.google.common.collect.Sets.SetView; 024import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 025import org.apache.jena.query.ParameterizedSparqlString; 026import org.apache.jena.query.Query; 027import org.apache.jena.query.QueryExecution; 028import org.apache.jena.rdf.model.Model; 029import org.apache.jena.rdf.model.ModelFactory; 030import org.apache.jena.riot.system.ErrorHandlerFactory; 031import org.dllearner.algorithms.properties.AxiomAlgorithms.AxiomTypeCluster; 032import org.dllearner.core.*; 033import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; 034import org.dllearner.kb.SparqlEndpointKS; 035import org.dllearner.kb.sparql.SparqlEndpoint; 036import org.dllearner.reasoning.SPARQLReasoner; 037import org.dllearner.utilities.OWLAPIUtils; 038import org.semanticweb.owlapi.model.*; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl; 042 043import java.net.URL; 044import java.util.*; 045import java.util.concurrent.ExecutorService; 046import java.util.concurrent.Executors; 047import java.util.concurrent.TimeUnit; 048 049/** 050 * This is a wrapper class to handle more than one property axiom type in a more intelligent way, 051 * e.g. when sampling is activated it might be better to generate a unique sample and apply the 052 * algorithms on that sample afterwards. 053 * Note that this only works for subsets of axiom types that have the same sample structure. 054 * </br> 055 * Additionally, this class is able to configure and run the algorithms in a parallel way. 056 * @author Lorenz Buehmann 057 * 058 */ 059public class MultiPropertyAxiomLearner { 060 061 private static final Logger logger = LoggerFactory.getLogger(MultiPropertyAxiomLearner.class); 062 063 private SparqlEndpointKS ks; 064 private SPARQLReasoner reasoner; 065 private QueryExecutionFactory qef; 066 067 private boolean useSampling = false; 068 private long maxSampleGenerationTimeMilliseconds = 10000; 069 private long pageSize = 10000; 070 071 private boolean multiThreaded = false; 072 private int maxNrOfThreads = 1; 073 074 private long maxExecutionTimeMilliseconds = -1; 075 076 private long startTime; 077 078 private AxiomLearningProgressMonitor progressMonitor = new SilentAxiomLearningProgressMonitor(); 079 080 private Map<AxiomType<? extends OWLAxiom>, List<EvaluatedAxiom<OWLAxiom>>> results; 081 082 private OWLEntity entity; 083 084 private Set<AxiomType<? extends OWLAxiom>> axiomTypes; 085 086 private Map<AxiomType<? extends OWLAxiom>, AbstractAxiomLearningAlgorithm> algorithms = new HashMap<>(); 087 088 public MultiPropertyAxiomLearner(SparqlEndpointKS ks) { 089 this(ks.getQueryExecutionFactory()); 090 this.ks = ks; 091 } 092 093 public MultiPropertyAxiomLearner(QueryExecutionFactory qef) { 094 this.qef = qef; 095 this.reasoner = new SPARQLReasoner(qef); 096 } 097 098 public void start(){ 099 startTime = System.currentTimeMillis(); 100 101 checkConfigOptions(); 102 103 // check if entity is empty 104 int popularity = reasoner.getPopularity(entity); 105 if(popularity == 0){ 106 logger.warn("Cannot make axiom suggestions for empty " + entity.getEntityType().getName() + " " + entity.toStringID()); 107 return; 108 } 109 110 results = Maps.newConcurrentMap(); 111 112 EntityType<?> entityType = entity.getEntityType(); 113 114 // check for axiom types that are not appropriate for the given entity 115 Set<AxiomType<? extends OWLAxiom>> possibleAxiomTypes = AxiomAlgorithms.getAxiomTypes(entityType); 116 SetView<AxiomType<? extends OWLAxiom>> notAllowed = Sets.difference(axiomTypes, possibleAxiomTypes); 117 if(!notAllowed.isEmpty()){ 118 logger.warn("Not supported axiom types for entity " + entity + " :" + notAllowed); 119 } 120 121 Set<AxiomType<? extends OWLAxiom>> todo = Sets.intersection(axiomTypes, possibleAxiomTypes); 122 123 // compute samples for axiom types 124 Set<AxiomTypeCluster> sampleClusters = AxiomAlgorithms.getSameSampleClusters(entityType); 125 126 ExecutorService tp = Executors.newFixedThreadPool(maxNrOfThreads); 127 128 for (final AxiomTypeCluster cluster : sampleClusters) { 129 final SetView<AxiomType<? extends OWLAxiom>> sampleAxiomTypes = Sets.intersection(cluster.getAxiomTypes(), todo); 130 131 if(!sampleAxiomTypes.isEmpty()){ 132 tp.submit(() -> { 133 try { 134 SparqlEndpointKS ks1 = MultiPropertyAxiomLearner.this.ks; 135 136 // get sample if enabled 137 if(useSampling){ 138 Model sample = generateSample(entity, cluster); 139 140 // if sampling failed, we skip 141 if(sample == null) { 142 return; 143 } 144 145 // if the sample is empty, we skip and show warning 146 if(sample.isEmpty()) { 147 logger.warn("Empty sample. Skipped learning."); 148 return; 149 } 150 151 ks1 = new LocalModelBasedSparqlEndpointKS(sample); 152 } 153 154 // process each axiom type 155 for (AxiomType<? extends OWLAxiom> axiomType : sampleAxiomTypes) { 156 try { 157 List<EvaluatedAxiom<OWLAxiom>> result = applyAlgorithm(axiomType, ks1); 158 results.put(axiomType, result); 159 } catch (Exception e) { 160 logger.error("An error occurred while generating " + axiomType.getName() + 161 " axioms for " + OWLAPIUtils.getPrintName(entity.getEntityType()) + " " + entity.toStringID(), e); 162 } 163 } 164 } catch (Exception e) { 165 logger.error("Failed to process " + cluster, e); 166 } 167 }); 168 169 } 170 } 171 172 try { 173 tp.shutdown(); 174 tp.awaitTermination(1, TimeUnit.HOURS); 175 } catch (InterruptedException e) { 176 e.printStackTrace(); 177 } 178// 179// for (AxiomType<? extends OWLAxiom> axiomType : todo) { 180// try { 181// applyAlgorithm(entity, axiomType, useSampling ? axiomType2Ks.get(axiomType) : ks); 182// } catch (Exception e) { 183// logger.error("Error occurred while generating " + axiomType.getName() + " for entity " + entity, e); 184// } 185// } 186 } 187 188 public Map<AxiomType<? extends OWLAxiom>, List<EvaluatedAxiom<OWLAxiom>>> getCurrentlyBestEvaluatedAxioms() { 189 return results; 190 } 191 192 public List<EvaluatedAxiom<OWLAxiom>> getCurrentlyBestEvaluatedAxioms(AxiomType<? extends OWLAxiom> axiomType) { 193 return new ArrayList<>(results.get(axiomType)); 194 } 195 196 public List<EvaluatedAxiom<OWLAxiom>> getCurrentlyBestEvaluatedAxioms(AxiomType<? extends OWLAxiom> axiomType, double accuracyThreshold) { 197 List<EvaluatedAxiom<OWLAxiom>> result = results.get(axiomType); 198 199 // throw exception if computation failed 200 if(result == null) { 201 return Collections.emptyList(); 202// throw new NoResult 203 } 204 205 // get all axioms above threshold 206 List<EvaluatedAxiom<OWLAxiom>> bestAxioms = new ArrayList<>(); 207 for (EvaluatedAxiom<OWLAxiom> axiom : result) { 208 if(axiom.getScore().getAccuracy() >= accuracyThreshold){ 209 bestAxioms.add(axiom); 210 } 211 } 212 213 return bestAxioms; 214 } 215 216 public void setProgressMonitor(AxiomLearningProgressMonitor progressMonitor) { 217 this.progressMonitor = progressMonitor; 218 } 219 220 public void setEntityToDescribe(OWLEntity entity){ 221 this.entity = entity; 222 } 223 224 /** 225 * Set the axiom types that will be processed. Note, a sanity check is done such that only axiom types 226 * will be processed that correspond to the declared entity. 227 * @param axiomTypes the axiom types 228 */ 229 public void setAxiomTypes(Set<AxiomType<? extends OWLAxiom>> axiomTypes){ 230 this.axiomTypes = axiomTypes; 231 } 232 233 public AbstractAxiomLearningAlgorithm getAlgorithm(AxiomType<? extends OWLAxiom> axiomType) { 234 return algorithms.get(axiomType); 235 } 236 237 public Set<OWLObject> getPositives(AxiomType<? extends OWLAxiom> axiomType, EvaluatedAxiom<OWLAxiom> axiom){ 238 AbstractAxiomLearningAlgorithm la = algorithms.get(axiomType); 239 return la.getPositiveExamples(axiom); 240 } 241 242 public Set<OWLObject> getNegatives(AxiomType<? extends OWLAxiom> axiomType, EvaluatedAxiom<OWLAxiom> axiom){ 243 AbstractAxiomLearningAlgorithm la = algorithms.get(axiomType); 244 return la.getNegativeExamples(axiom); 245 } 246 247 /** 248 * @param useSampling the useSampling to set 249 */ 250 public void setUseSampling(boolean useSampling) { 251 this.useSampling = useSampling; 252 } 253 254 /** 255 * @param multiThreaded whether to enable multi-threaded execution (@see setMaxNrOfThreads) 256 */ 257 public void setMultiThreaded(boolean multiThreaded) { 258 this.multiThreaded = multiThreaded; 259 } 260 261 /** 262 * @param maxNrOfThreads the max. nr of threads 263 */ 264 public void setMaxNrOfThreads(int maxNrOfThreads) { 265 this.maxNrOfThreads = maxNrOfThreads; 266 } 267 268 /** 269 * Set the maximum execution time. Note, this value represents the total computation time of all axiom types that have 270 * been set, thus, it's recommended to increase the runtime. 271 * @param executionTimeDuration the execution time 272 * @param executionTimeUnit the time unit 273 */ 274 public void setMaxExecutionTime(long executionTimeDuration, TimeUnit executionTimeUnit) { 275 this.maxExecutionTimeMilliseconds = executionTimeUnit.toMillis(executionTimeDuration); 276 } 277 278 /** 279 * Set the maximum time to generate a sample. Note, this value represents the time to compute a single sample, thus, 280 * this time is spend for each axiom type (resp. cluster of axiom types). 281 * @param sampleGenerationTimeDuration the sample generation time 282 * @param sampleGenerationTimeUnit the time unit 283 */ 284 public void setMaxSampleGenerationTime(long sampleGenerationTimeDuration, TimeUnit sampleGenerationTimeUnit) { 285 this.maxSampleGenerationTimeMilliseconds = sampleGenerationTimeUnit.toMillis(sampleGenerationTimeDuration); 286 } 287 288 private List<EvaluatedAxiom<OWLAxiom>> applyAlgorithm(AxiomType<? extends OWLAxiom> axiomType, SparqlEndpointKS ks) throws ComponentInitException{ 289 Class<? extends AbstractAxiomLearningAlgorithm<? extends OWLAxiom, ? extends OWLObject, ? extends OWLEntity>> algorithmClass = AxiomAlgorithms.getAlgorithmClass(axiomType); 290 AbstractAxiomLearningAlgorithm learner = null; 291 try { 292 learner = algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance(ks); 293 294 learner.setEntityToDescribe(entity); 295 learner.setUseSampling(false); 296 learner.setProgressMonitor(progressMonitor); 297 learner.init(); 298 learner.start(); 299 300 algorithms.put(axiomType, learner); 301 302 return learner.getCurrentlyBestEvaluatedAxioms(); 303 } catch (ComponentInitException e) { 304 throw e; 305 } catch (Exception e) { 306 throw new RuntimeException("Failed to apply algorithm for " + axiomType + " axioms on entity " + entity); 307 } 308 } 309 310 private Model generateSample(OWLEntity entity, AxiomTypeCluster cluster){ 311 logger.info("Generating sample (" + cluster + ") for " + OWLAPIUtils.getPrintName(entity.getEntityType()) + " " + entity.toStringID() + "..."); 312 long startTime = System.currentTimeMillis(); 313 314 Model sample = ModelFactory.createDefaultModel(); 315 316 ParameterizedSparqlString sampleQueryTemplate = cluster.getSampleQuery(); 317 sampleQueryTemplate.clearParam("entity"); 318 sampleQueryTemplate.setIri("entity", entity.toStringID()); 319 320 Query query = sampleQueryTemplate.asQuery(); 321 query.setLimit(pageSize); 322 323 boolean isEmpty = false; 324 boolean samplingTimeout = false; 325 int i = 0; 326 while(!isEmpty && !samplingTimeout && !isTimeout()){ 327 // get next sample 328 logger.debug("Extending sample..."); 329 query.setOffset(i++ * pageSize); 330 logger.debug("sending query\n" + query); 331 try (QueryExecution qe=qef.createQueryExecution(query)) { 332 // set the remaining runtime 333 qe.setTimeout(maxSampleGenerationTimeMilliseconds - (System.currentTimeMillis() - startTime)); 334 335 // execute query 336 Model tmp = qe.execConstruct(); 337 sample.add(tmp); 338 339 // if last call returned empty model, we can leave loop 340 isEmpty = tmp.isEmpty(); 341 } catch (Exception e) { 342 boolean syntaxError = e instanceof RuntimeException && 343 e.getCause() != null && 344 e.getCause() instanceof org.apache.jena.riot.RiotException; 345 logger.error("Sample generation for " + cluster + " failed. Reason:\n"); 346 if(syntaxError) { 347 logger.error("Endpoint returned illegal data with error\n" + e.getCause().getMessage() 348 + "\nfor query\n" + query + "\n"); 349 } else { 350 logger.error("Query execution failed for query\n" + query, e); 351 } 352 353 return null; 354 } 355 // checker for sampling timeout 356 samplingTimeout = (System.currentTimeMillis() - startTime) >= maxSampleGenerationTimeMilliseconds; 357 if(samplingTimeout) { 358 logger.info("Sampling timeout."); 359 } 360 } 361 logger.info("Finished generating sample. Sample size: " + sample.size() + " triples"); 362 if(isEmpty) { 363 logger.info("Sample contains the whole relevant data."); 364 } 365 return sample; 366 } 367 368 private boolean isTimeout(){ 369 return maxExecutionTimeMilliseconds > 0 && getRemainingRuntimeMilliSeconds() <= 0; 370 } 371 372 private long getRemainingRuntimeMilliSeconds(){ 373 long duration = System.currentTimeMillis() - startTime; 374 return Math.max(0, (maxExecutionTimeMilliseconds - duration)); 375 } 376 377 private void checkConfigOptions() { 378 if(multiThreaded && maxNrOfThreads == 1) { 379 logger.warn("You enabled multi-threaded execution but set the number of threads to 1. " + 380 "You probably want to increase this value."); 381 } 382 383 if (useSampling && maxExecutionTimeMilliseconds <= 0) { 384 logger.warn( 385 "You enabled sampling but set no execution timeout. This means that the whole data will be loaded " + 386 "locally which might be time and resource consuming. We suggest to either set a timeout or " + 387 "disable the sampling in that case."); 388 } 389 } 390 391 public static void main(String[] args) throws Exception{ 392 ErrorHandlerFactory.setDefaultErrorHandler(ErrorHandlerFactory.errorHandlerStrictNoLogging); 393 SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://dbpedia.org/sparql"), "http://dbpedia.org"); 394// endpoint = SparqlEndpoint.getEndpointDBpedia(); 395 SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); 396 ks.init(); 397 398 OWLEntity entity = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/author")); 399 400 MultiPropertyAxiomLearner la = new MultiPropertyAxiomLearner(ks); 401 la.setEntityToDescribe(entity); 402 403 la.setUseSampling(true); 404 la.setMaxSampleGenerationTime(10, TimeUnit.SECONDS); 405 406 la.setMaxNrOfThreads(1); 407 la.setAxiomTypes(Sets.newHashSet( 408 AxiomType.OBJECT_PROPERTY_DOMAIN, 409 AxiomType.OBJECT_PROPERTY_RANGE, 410 AxiomType.SUB_OBJECT_PROPERTY, AxiomType.EQUIVALENT_OBJECT_PROPERTIES, 411 AxiomType.FUNCTIONAL_OBJECT_PROPERTY, AxiomType.ASYMMETRIC_OBJECT_PROPERTY, AxiomType.IRREFLEXIVE_OBJECT_PROPERTY, 412 AxiomType.TRANSITIVE_OBJECT_PROPERTY 413 ) 414 ); 415 la.setMaxExecutionTime(1, TimeUnit.MINUTES); 416 la.start(); 417 418 } 419 420}