001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.sparql; 020 021import java.util.*; 022import java.util.function.Predicate; 023import java.util.stream.Collectors; 024 025import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 026import org.apache.jena.datatypes.BaseDatatype; 027import org.apache.jena.vocabulary.RDF; 028import org.dllearner.kb.SparqlEndpointKS; 029import org.dllearner.reasoning.SPARQLReasoner; 030import org.dllearner.utilities.HasProgressMonitor; 031import org.dllearner.utilities.OwlApiJenaUtils; 032import org.dllearner.utilities.ProgressMonitor; 033import org.dllearner.utilities.owl.OWLEntityTypeAdder; 034import org.semanticweb.owlapi.model.OWLIndividual; 035import org.semanticweb.owlapi.model.OWLOntology; 036 037import com.google.common.collect.Sets; 038import org.apache.jena.datatypes.RDFDatatype; 039import org.apache.jena.datatypes.xsd.XSDDatatype; 040import org.apache.jena.query.QueryExecution; 041import org.apache.jena.rdf.model.Literal; 042import org.apache.jena.rdf.model.Model; 043import org.apache.jena.rdf.model.Statement; 044import org.apache.jena.rdf.model.StmtIterator; 045import org.apache.jena.vocabulary.OWL; 046 047/** 048 * @author Lorenz Buehmann 049 * 050 */ 051public abstract class AbstractSampleGenerator implements HasProgressMonitor<AbstractSampleGenerator.SampleGeneratorProgressMonitor> { 052 053 public interface SampleGeneratorProgressMonitor extends ProgressMonitor { 054 void sampleGenerationStarted(); 055 void sampleGenerationFinished(); 056 } 057 private Collection<SampleGeneratorProgressMonitor> progressMonitors = new LinkedHashSet<>(); 058 @Override 059 public Collection<SampleGeneratorProgressMonitor> progressMonitors() { 060 return progressMonitors; 061 } 062 063 private void fireSampleGenerationStarted() { 064 progressMonitors().forEach(SampleGeneratorProgressMonitor::sampleGenerationStarted); 065 } 066 067 private void fireSampleGenerationFinished() { 068 progressMonitors().forEach(SampleGeneratorProgressMonitor::sampleGenerationFinished); 069 } 070 071 private ConciseBoundedDescriptionGenerator cbdGen; 072 073 private int sampleDepth = 2; 074 075 protected QueryExecutionFactory qef; 076 077 protected SPARQLReasoner reasoner; 078 079 private boolean loadRelatedSchema = true; 080 081 082 public AbstractSampleGenerator(SparqlEndpointKS ks) { 083 this(ks.getQueryExecutionFactory()); 084 } 085 086 public AbstractSampleGenerator(QueryExecutionFactory qef) { 087 this.qef = qef; 088 089 cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef); 090 cbdGen.setIgnoredProperties(Sets.newHashSet(OWL.sameAs.getURI())); 091 092 reasoner = new SPARQLReasoner(qef); 093 } 094 095 public void addAllowedPropertyNamespaces(Set<String> namespaces) { 096 cbdGen.setAllowedPropertyNamespaces(namespaces); 097 } 098 099 public void addAllowedObjectNamespaces(Set<String> namespaces) { 100 cbdGen.setAllowedObjectNamespaces(namespaces); 101 } 102 103 public void addIgnoredProperties(Set<String> ignoredProperties) { 104 cbdGen.setIgnoredProperties(ignoredProperties); 105 } 106 107 public void addIgnoredClasses(Set<String> ignoredProperties) { 108 cbdGen.setIgnoredProperties(ignoredProperties); 109 } 110 111 public void addAllowedClassNamespaces(Set<String> ignoredProperties) { 112 cbdGen.setIgnoredProperties(ignoredProperties); 113 } 114 115 public void setLoadRelatedSchema(boolean loadRelatedSchema) { 116 this.loadRelatedSchema = loadRelatedSchema; 117 } 118 119 /** 120 * Computes a sample of the knowledge base, i.e. it contains only facts 121 * about the positive and negative individuals. 122 * @param individuals the individuals 123 * @return a sample ontology of the knowledge bas 124 */ 125 public OWLOntology getSample(Set<OWLIndividual> individuals) { 126 fireSampleGenerationStarted(); 127 128 // get the sample model 129 Model sampleModel = getSampleModel(individuals); 130 131 // convert to ontology 132 OWLOntology sampleOntology = OwlApiJenaUtils.getOWLOntology(sampleModel); 133 134 fireSampleGenerationFinished(); 135 136 return sampleOntology; 137 } 138 139 /** 140 * @param sampleDepth the maximum sample depth to set 141 */ 142 public void setSampleDepth(int sampleDepth) { 143 this.sampleDepth = sampleDepth; 144 } 145 146 /** 147 * @return the maximum sample depth 148 */ 149 public int getSampleDepth() { 150 return sampleDepth; 151 } 152 153 public void addPostProcessor(Predicate<Statement> postProcessStatementFilter) { 154 155 } 156 157 158 protected Model getSampleModel(Set<OWLIndividual> individuals) { 159 Set<String> resources = individuals.stream().map(OWLIndividual::toStringID).collect(Collectors.toSet()); 160 Model model = cbdGen.getConciseBoundedDescription(resources, sampleDepth); 161 162// Model model = ModelFactory.createDefaultModel(); 163// 164 // load instance data 165// for(OWLIndividual ind : individuals){ 166// Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), sampleDepth); 167// model.add(cbd); 168// } 169 170 StmtIterator iterator = model.listStatements(); 171 List<Statement> toAdd = new ArrayList<>(); 172 while(iterator.hasNext()) { 173 Statement st = iterator.next(); 174 if(st.getObject().isLiteral()) { 175 Literal lit = st.getObject().asLiteral(); 176 RDFDatatype datatype = lit.getDatatype(); 177 178 if(datatype != null) { 179 if(datatype.equals(XSDDatatype.XSDdouble) && lit.getLexicalForm().equals("NAN")) { 180 iterator.remove(); 181 toAdd.add(model.createLiteralStatement(st.getSubject(), st.getPredicate(), Double.NaN)); 182 } else if(datatype.equals(XSDDatatype.XSDgYear) && st.getPredicate().getURI().equals("http://dbpedia.org/ontology/birthDate")) { 183 iterator.remove(); 184 toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", XSDDatatype.XSDdate))); 185 } else if(datatype.equals(XSDDatatype.XSDdate)) { 186 iterator.remove(); 187 toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", XSDDatatype.XSDdate))); 188 } else if(datatype.equals(RDF.langString)) { 189 iterator.remove(); 190 toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", new BaseDatatype(RDF.dtLangString.getURI())))); 191 } 192 } 193 } 194 } 195 model.add(toAdd); 196 197 // infer entity types, e.g. object or data property 198 OWLEntityTypeAdder.addEntityTypes(model); 199 200 // load related schema information 201 if(loadRelatedSchema) { 202// loadRelatedSchema(model); 203 } 204 205 return model; 206 } 207 208 private void loadRelatedSchema(Model model) { 209 String query = 210 "CONSTRUCT {" + 211 "?p a owl:ObjectProperty;" + 212// "a ?type;" + 213 "rdfs:domain ?domain;" + 214 "rdfs:range ?range." + 215 "} WHERE {" + 216 "?p a owl:ObjectProperty." + 217// "?p a ?type. " + 218 "OPTIONAL{?p rdfs:domain ?domain.} " + 219 "OPTIONAL{?p rdfs:range ?range.}" + 220 "}"; 221 222 QueryExecution qe = qef.createQueryExecution(query); 223 qe.execConstruct(model); 224 qe.close(); 225 226 query = 227 "CONSTRUCT {" + 228 "?s a owl:Class ." + 229 "?s rdfs:subClassOf ?sup ." + 230 "} WHERE {\n" + 231 "?s a owl:Class ." + 232 "OPTIONAL{?s rdfs:subClassOf ?sup .} " + 233 "}"; 234 qe = qef.createQueryExecution(query); 235 qe.execConstruct(model); 236 qe.close(); 237 } 238 239}