001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.properties;
020
021import org.apache.jena.query.*;
022import org.dllearner.core.ComponentAnn;
023import org.dllearner.core.ConsoleAxiomLearningProgressMonitor;
024import org.dllearner.core.EvaluatedAxiom;
025import org.dllearner.core.config.ConfigOption;
026import org.dllearner.kb.SparqlEndpointKS;
027import org.dllearner.kb.sparql.SparqlEndpoint;
028import org.dllearner.learningproblems.AxiomScore;
029import org.dllearner.learningproblems.Heuristics;
030import org.dllearner.utilities.OwlApiJenaUtils;
031import org.semanticweb.owlapi.dlsyntax.renderer.DLSyntaxObjectRenderer;
032import org.semanticweb.owlapi.io.ToStringRenderer;
033import org.semanticweb.owlapi.model.*;
034import uk.ac.manchester.cs.owl.owlapi.OWLDataPropertyImpl;
035import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl;
036
037import java.util.Set;
038import java.util.TreeSet;
039
040@ComponentAnn(name="data property domain axiom learner", shortName="dpldomain", version=0.1, description="A learning algorithm for data property domain axioms.")
041public class DataPropertyDomainAxiomLearner extends DataPropertyAxiomLearner<OWLDataPropertyDomainAxiom> {
042
043        private static final ParameterizedSparqlString POPULARITY_COUNT_QUERY = new ParameterizedSparqlString(
044                        "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {?s ?p ?o .}");
045
046        private static final ParameterizedSparqlString SUBJECTS_OF_TYPE_COUNT_QUERY = new ParameterizedSparqlString(
047                        "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {?s ?p ?o; a ?type .}");
048
049        private static final ParameterizedSparqlString SUBJECTS_OF_TYPE_WITH_INFERENCE_COUNT_QUERY = new ParameterizedSparqlString(
050                        "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {?s ?p ?o; rdf:type/rdfs:subClassOf* ?type .}");
051        private static final ParameterizedSparqlString SUBJECTS_OF_TYPE_COUNT_BATCHED_QUERY = new ParameterizedSparqlString(
052                        "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT ?type (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {?s ?p ?o; a ?type . ?type a owl:Class .} GROUP BY ?type");
053        private static final ParameterizedSparqlString SUBJECTS_OF_TYPE_WITH_INFERENCE_COUNT_BATCHED_QUERY = new ParameterizedSparqlString(
054                        "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT ?type (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {?s ?p ?o; rdf:type/rdfs:subClassOf* ?type . ?type a owl:Class .} GROUP BY ?type");
055        private static final ParameterizedSparqlString SAMPLE_QUERY = new ParameterizedSparqlString(
056                        "PREFIX owl:<http://www.w3.org/2002/07/owl#> CONSTRUCT {?s ?p ?o; a ?cls . ?cls a owl:Class .} "
057                        + "WHERE {?s ?p ?o . OPTIONAL {?s a ?cls . ?cls a owl:Class .}}");
058
059        @ConfigOption(defaultValue = "false", description = "compute everything in a single SPARQL query")
060        protected boolean batchMode = false;
061
062        public DataPropertyDomainAxiomLearner(){
063                super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s ?o WHERE {?s ?p ?o. ?s a ?type}");
064                super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s ?o WHERE {?s ?p ?o. FILTER NOT EXISTS{?s a ?type}}");
065
066                axiomType = AxiomType.DATA_PROPERTY_DOMAIN;
067        }
068        
069        public DataPropertyDomainAxiomLearner(SparqlEndpointKS ks){
070                this();
071                this.ks = ks;
072        }
073        
074        /* (non-Javadoc)
075         * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#getExistingAxioms()
076         */
077        @Override
078        protected void getExistingAxioms() {
079                OWLClassExpression existingDomain = reasoner.getDomain(entityToDescribe);
080                if(existingDomain != null){
081                        existingAxioms.add(df.getOWLDataPropertyDomainAxiom(entityToDescribe, existingDomain));
082                        if(reasoner.isPrepared()){
083                                if(reasoner.getClassHierarchy().contains(existingDomain)){
084                                        for(OWLClassExpression sup : reasoner.getClassHierarchy().getSuperClasses(existingDomain, false)){
085                                                existingAxioms.add(df.getOWLDataPropertyDomainAxiom(entityToDescribe, existingDomain));
086                                                logger.info("Existing domain(inferred): " + sup);
087                                        }
088                                }
089                                
090                        }
091                }
092        }
093
094        protected int getPopularity() {
095                POPULARITY_COUNT_QUERY.setIri("p", entityToDescribe.toStringID());
096                String query = POPULARITY_COUNT_QUERY.toString();
097                ResultSet rs = executeSelectQuery(query);
098                int popularity = rs.next().getLiteral("cnt").getInt();
099                return popularity;
100        }
101        
102        @Override
103        public void setEntityToDescribe(OWLDataProperty entityToDescribe) {
104                super.setEntityToDescribe(entityToDescribe);
105                
106                DISTINCT_SUBJECTS_COUNT_QUERY.setIri("p", entityToDescribe.toStringID());
107                SUBJECTS_OF_TYPE_COUNT_QUERY.setIri("p", entityToDescribe.toStringID());
108                SUBJECTS_OF_TYPE_WITH_INFERENCE_COUNT_QUERY.setIri("p", entityToDescribe.toStringID());
109                SUBJECTS_OF_TYPE_COUNT_BATCHED_QUERY.setIri("p", entityToDescribe.toStringID());
110                SUBJECTS_OF_TYPE_WITH_INFERENCE_COUNT_BATCHED_QUERY.setIri("p", entityToDescribe.toStringID());
111                SAMPLE_QUERY.setIri("p", entityToDescribe.toStringID());
112        }
113        
114        /* (non-Javadoc)
115         * @see org.dllearner.algorithms.properties.PropertyAxiomLearner#getSampleQuery()
116         */
117        @Override
118        protected ParameterizedSparqlString getSampleQuery() {
119                return SAMPLE_QUERY;
120        }
121
122        @Override
123        protected void run(){
124                if(batchMode) {
125                        runBatched();
126                } else {
127                        runIterative();
128                }
129        }
130
131        /**
132         * We can handle the domain axiom Domain(r, C) as a subclass of axiom \exists r.\top \sqsubseteq C
133         * 
134         * A = \exists r.\top
135         * B = C
136         */
137        private void runIterative(){
138                // get the candidates
139                Set<OWLClass> candidates = reasoner.getNonEmptyOWLClasses();
140                
141                // check for each candidate how often the subject belongs to it
142                int i = 1;
143                for (OWLClass candidate : candidates) {
144                        progressMonitor.learningProgressChanged(axiomType, i++, candidates.size());
145                        
146                        //get total number of instances of B
147                        int cntB = reasoner.getPopularity(candidate);
148                        
149                        if(cntB == 0){// skip empty properties
150                                logger.debug("Cannot compute domain statements for empty candidate class " + candidate);
151                                continue;
152                        }
153                        
154                        //get number of instances of (A AND B)
155                        SUBJECTS_OF_TYPE_COUNT_QUERY.setIri("type", candidate.toStringID());
156                        int cntAB = executeSelectQuery(SUBJECTS_OF_TYPE_COUNT_QUERY.toString()).next().getLiteral("cnt").getInt();
157                        logger.debug("Candidate:" + candidate + "\npopularity:" + cntB + "\noverlap:" + cntAB);
158                        
159                        // compute score
160                        AxiomScore score = computeScore(popularity, cntB, cntAB);
161                        
162                        currentlyBestAxioms.add(
163                                        new EvaluatedAxiom<>(
164                                                        df.getOWLDataPropertyDomainAxiom(entityToDescribe, candidate),
165                                                        score));
166                }
167        }
168
169        /**
170         * We can handle the domain axiom Domain(r, C) as a subclass of axiom \exists r.\top \sqsubseteq C
171         */
172        private void runBatched(){
173                
174                reasoner.precomputeClassPopularity();
175                
176                // get for each subject type the frequency
177                ResultSet rs = executeSelectQuery(SUBJECTS_OF_TYPE_COUNT_BATCHED_QUERY.toString());
178                ResultSetRewindable rsrw = ResultSetFactory.copyResults(rs);
179                int size = rsrw.size();
180                rsrw.reset();
181                int i = 1;
182                while(rsrw.hasNext()){
183                        QuerySolution qs = rsrw.next();
184                        if(qs.getResource("type").isURIResource()){
185                                progressMonitor.learningProgressChanged(axiomType, i++, size);
186                                
187                                OWLClass candidate = df.getOWLClass(IRI.create(qs.getResource("type").getURI()));
188                                
189                                //get total number of instances of B
190                                int cntB = reasoner.getPopularity(candidate);
191                                
192                                //get number of instances of (A AND B)
193                                int cntAB = qs.getLiteral("cnt").getInt();
194                                
195                                //precision (A AND B)/B
196                                double precision = Heuristics.getConfidenceInterval95WaldAverage(cntB, cntAB);
197                                
198                                //recall (A AND B)/A
199                                double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, cntAB);
200                                
201                                //F score
202                                double score = Heuristics.getFScore(recall, precision, beta);
203                                
204                                currentlyBestAxioms.add(
205                                                new EvaluatedAxiom<>(
206                                                                df.getOWLDataPropertyDomainAxiom(entityToDescribe, candidate),
207                                                                new AxiomScore(score, useSampling)));
208                                
209                        }
210                }
211        }
212
213        @Override
214        protected Set<OWLDataPropertyAssertionAxiom> getExamples(ParameterizedSparqlString queryTemplate,
215                                                                                                                           EvaluatedAxiom<OWLDataPropertyDomainAxiom> evAxiom) {
216                OWLDataPropertyDomainAxiom axiom = evAxiom.getAxiom();
217                queryTemplate.setIri("p", entityToDescribe.toStringID());
218                queryTemplate.setIri("type", axiom.getDomain().asOWLClass().toStringID());
219
220                Set<OWLDataPropertyAssertionAxiom> examples = new TreeSet<>();
221
222                ResultSet rs = executeSelectQuery(queryTemplate.toString());
223
224                while (rs.hasNext()) {
225                        QuerySolution qs = rs.next();
226                        OWLIndividual subject = df.getOWLNamedIndividual(IRI.create(qs.getResource("s").getURI()));
227                        OWLLiteral object = OwlApiJenaUtils.getOWLLiteral(qs.getLiteral("o"));
228                        examples.add(df.getOWLDataPropertyAssertionAxiom(entityToDescribe, subject, object));
229                }
230
231                return examples;
232        }
233
234        public void setBatchMode(boolean batchMode) {
235                this.batchMode = batchMode;
236        }
237
238        public boolean isBatchMode() {
239                return batchMode;
240        }
241        
242        public static void main(String[] args) throws Exception{
243                ToStringRenderer.getInstance().setRenderer(new DLSyntaxObjectRenderer());
244                SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia());
245                ks.init();
246
247                DataPropertyDomainAxiomLearner l = new DataPropertyDomainAxiomLearner(ks);
248                l.setEntityToDescribe(new OWLDataPropertyImpl(IRI.create("http://dbpedia.org/ontology/birthDate")));
249                l.setUseSampling(false);
250                l.setBatchMode(true);
251                l.setUsePrecisionOnly(false);
252                l.setProgressMonitor(new ConsoleAxiomLearningProgressMonitor());
253                l.init();
254
255                l.start();
256
257                l.getCurrentlyBestEvaluatedAxioms(0.3).forEach(ax -> {
258                        System.out.println("---------------\n" + ax);
259                        l.getPositiveExamples(ax).stream().limit(5).forEach(System.out::println);
260                });
261        }
262
263        
264        
265
266}