001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.sparql;
020
021import java.util.*;
022import java.util.function.Predicate;
023import java.util.stream.Collectors;
024
025import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
026import org.apache.jena.datatypes.BaseDatatype;
027import org.apache.jena.vocabulary.RDF;
028import org.dllearner.kb.SparqlEndpointKS;
029import org.dllearner.reasoning.SPARQLReasoner;
030import org.dllearner.utilities.HasProgressMonitor;
031import org.dllearner.utilities.OwlApiJenaUtils;
032import org.dllearner.utilities.ProgressMonitor;
033import org.dllearner.utilities.owl.OWLEntityTypeAdder;
034import org.semanticweb.owlapi.model.OWLIndividual;
035import org.semanticweb.owlapi.model.OWLOntology;
036
037import com.google.common.collect.Sets;
038import org.apache.jena.datatypes.RDFDatatype;
039import org.apache.jena.datatypes.xsd.XSDDatatype;
040import org.apache.jena.query.QueryExecution;
041import org.apache.jena.rdf.model.Literal;
042import org.apache.jena.rdf.model.Model;
043import org.apache.jena.rdf.model.Statement;
044import org.apache.jena.rdf.model.StmtIterator;
045import org.apache.jena.vocabulary.OWL;
046
047/**
048 * @author Lorenz Buehmann
049 *
050 */
051public abstract class AbstractSampleGenerator implements HasProgressMonitor<AbstractSampleGenerator.SampleGeneratorProgressMonitor> {
052
053        public interface SampleGeneratorProgressMonitor extends ProgressMonitor {
054                void sampleGenerationStarted();
055                void sampleGenerationFinished();
056        }
057        private Collection<SampleGeneratorProgressMonitor> progressMonitors = new LinkedHashSet<>();
058        @Override
059        public Collection<SampleGeneratorProgressMonitor> progressMonitors() {
060                return progressMonitors;
061        }
062
063        private void fireSampleGenerationStarted() {
064                progressMonitors().forEach(SampleGeneratorProgressMonitor::sampleGenerationStarted);
065        }
066
067        private void fireSampleGenerationFinished() {
068                progressMonitors().forEach(SampleGeneratorProgressMonitor::sampleGenerationFinished);
069        }
070
071        private ConciseBoundedDescriptionGenerator cbdGen;
072        
073        private int sampleDepth = 2;
074
075        protected QueryExecutionFactory qef;
076        
077        protected SPARQLReasoner reasoner;
078        
079        private boolean loadRelatedSchema = true;
080
081
082        public AbstractSampleGenerator(SparqlEndpointKS ks) {
083                this(ks.getQueryExecutionFactory());
084        }
085        
086        public AbstractSampleGenerator(QueryExecutionFactory qef) {
087                this.qef = qef;
088                
089                cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef);
090                cbdGen.setIgnoredProperties(Sets.newHashSet(OWL.sameAs.getURI()));
091                
092                reasoner = new SPARQLReasoner(qef);
093        }
094        
095        public void addAllowedPropertyNamespaces(Set<String> namespaces) {
096                cbdGen.setAllowedPropertyNamespaces(namespaces);
097        }
098        
099        public void addAllowedObjectNamespaces(Set<String> namespaces) {
100                cbdGen.setAllowedObjectNamespaces(namespaces);
101        }
102        
103        public void addIgnoredProperties(Set<String> ignoredProperties) {
104                cbdGen.setIgnoredProperties(ignoredProperties);
105        }
106
107        public void addIgnoredClasses(Set<String> ignoredProperties) {
108                cbdGen.setIgnoredProperties(ignoredProperties);
109        }
110
111        public void addAllowedClassNamespaces(Set<String> ignoredProperties) {
112                cbdGen.setIgnoredProperties(ignoredProperties);
113        }
114
115        public void setLoadRelatedSchema(boolean loadRelatedSchema) {
116                this.loadRelatedSchema = loadRelatedSchema;
117        }
118
119        /**
120         * Computes a sample of the knowledge base, i.e. it contains only facts
121         * about the positive and negative individuals.
122         * @param individuals the individuals
123         * @return a sample ontology of the knowledge bas
124         */
125        public OWLOntology getSample(Set<OWLIndividual> individuals) {
126                fireSampleGenerationStarted();
127
128                // get the sample model
129                Model sampleModel = getSampleModel(individuals);
130
131                // convert to ontology
132                OWLOntology sampleOntology = OwlApiJenaUtils.getOWLOntology(sampleModel);
133
134                fireSampleGenerationFinished();
135
136                return sampleOntology;
137        }
138        
139        /**
140         * @param sampleDepth the maximum sample depth to set
141         */
142        public void setSampleDepth(int sampleDepth) {
143                this.sampleDepth = sampleDepth;
144        }
145        
146        /**
147         * @return the maximum sample depth
148         */
149        public int getSampleDepth() {
150                return sampleDepth;
151        }
152
153        public void addPostProcessor(Predicate<Statement> postProcessStatementFilter) {
154
155        }
156
157
158        protected Model getSampleModel(Set<OWLIndividual> individuals) {
159                Set<String> resources = individuals.stream().map(OWLIndividual::toStringID).collect(Collectors.toSet());
160                Model model = cbdGen.getConciseBoundedDescription(resources, sampleDepth);
161
162//              Model model = ModelFactory.createDefaultModel();
163//
164                // load instance data
165//              for(OWLIndividual ind : individuals){
166//                      Model cbd = cbdGen.getConciseBoundedDescription(ind.toStringID(), sampleDepth);
167//                      model.add(cbd);
168//              }
169                
170                StmtIterator iterator = model.listStatements();
171                List<Statement> toAdd = new ArrayList<>();
172                while(iterator.hasNext()) {
173                        Statement st = iterator.next();
174                        if(st.getObject().isLiteral()) {
175                                Literal lit = st.getObject().asLiteral();
176                                RDFDatatype datatype = lit.getDatatype();
177                                
178                                if(datatype != null) {
179                                        if(datatype.equals(XSDDatatype.XSDdouble) && lit.getLexicalForm().equals("NAN")) {
180                                                iterator.remove();
181                                                toAdd.add(model.createLiteralStatement(st.getSubject(), st.getPredicate(), Double.NaN));
182                                        } else if(datatype.equals(XSDDatatype.XSDgYear) && st.getPredicate().getURI().equals("http://dbpedia.org/ontology/birthDate")) {
183                                                iterator.remove();
184                                                toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", XSDDatatype.XSDdate)));
185                                        } else if(datatype.equals(XSDDatatype.XSDdate)) {
186                                                iterator.remove();
187                                                toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", XSDDatatype.XSDdate)));
188                                        } else if(datatype.equals(RDF.langString)) {
189                                                iterator.remove();
190                                                toAdd.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral("2000-01-01", new BaseDatatype(RDF.dtLangString.getURI()))));
191                                        }
192                                }
193                        }
194                }
195                model.add(toAdd);
196                
197                // infer entity types, e.g. object or data property
198                OWLEntityTypeAdder.addEntityTypes(model);
199                
200                // load related schema information
201                if(loadRelatedSchema) {
202//                      loadRelatedSchema(model);
203                }
204                
205                return model;
206        }
207        
208        private void loadRelatedSchema(Model model) {
209                String query = 
210                                "CONSTRUCT {" +
211                                "?p a owl:ObjectProperty;" +
212//                              "a ?type;" +
213                                "rdfs:domain ?domain;" +
214                                "rdfs:range ?range." +
215                                "} WHERE {" +
216                                "?p a owl:ObjectProperty." +
217//                              "?p a ?type. " +
218                                "OPTIONAL{?p rdfs:domain ?domain.} " +
219                                "OPTIONAL{?p rdfs:range ?range.}" +
220                                "}";
221                
222                QueryExecution qe = qef.createQueryExecution(query);
223                qe.execConstruct(model);
224                qe.close();
225                
226                query = 
227                                "CONSTRUCT {" +
228                                "?s a owl:Class ." +
229                                "?s rdfs:subClassOf ?sup ." +
230                                "} WHERE {\n" +
231                                "?s a owl:Class ." +
232                                "OPTIONAL{?s rdfs:subClassOf ?sup .} " +
233                                "}";
234                qe = qef.createQueryExecution(query);
235                qe.execConstruct(model);
236                qe.close();
237        }
238
239}