001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms;
020
021import org.apache.jena.ontology.OntClass;
022import org.apache.jena.ontology.OntModel;
023import org.apache.jena.query.*;
024import org.dllearner.core.*;
025import org.dllearner.core.annotations.Unused;
026import org.dllearner.core.config.ConfigOption;
027import org.dllearner.core.owl.ClassHierarchy;
028import org.dllearner.kb.LocalModelBasedSparqlEndpointKS;
029import org.dllearner.kb.SparqlEndpointKS;
030import org.dllearner.kb.sparql.SparqlEndpoint;
031import org.dllearner.learningproblems.AxiomScore;
032import org.dllearner.learningproblems.Heuristics;
033import org.semanticweb.owlapi.model.*;
034import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl;
035
036import java.util.*;
037import java.util.Map.Entry;
038import java.util.stream.Collectors;
039
040/**
041 * Learns disjoint classes using SPARQL queries.
042 * 
043 * @author Lorenz Bühmann
044 * @author Jens Lehmann
045 *
046 */
047@ComponentAnn(name = "disjoint classes learner", shortName = "cldisjoint", version = 0.1)
048public class DisjointClassesLearner extends AbstractAxiomLearningAlgorithm<OWLDisjointClassesAxiom, OWLIndividual, OWLClass>
049                implements ClassExpressionLearningAlgorithm {
050        
051        protected static final ParameterizedSparqlString CLASS_OVERLAP_QUERY = new ParameterizedSparqlString(
052                        "SELECT ?cls_other (COUNT(?s) AS ?overlap) WHERE {"
053                        + "?s a ?cls, ?cls_other . "
054                        + "?cls_other a <http://www.w3.org/2002/07/owl#Class> . FILTER(?cls != ?cls_other)}"
055                        + " GROUP BY ?cls_other");
056
057        protected static final ParameterizedSparqlString GIVEN_CLASS_OVERLAP_QUERY = new ParameterizedSparqlString(
058                                        "SELECT (COUNT(?s) AS ?overlap) WHERE {?s a ?cls, ?cls_other . }");
059        
060        private static final ParameterizedSparqlString SAMPLE_QUERY = new ParameterizedSparqlString(
061                        "CONSTRUCT{?s a ?entity . ?s a ?cls1 .} WHERE {?s a ?entity . OPTIONAL {?s a ?cls1 .} }");
062
063        private List<EvaluatedDescription<? extends Score>> currentlyBestEvaluatedDescriptions;
064        private SortedSet<OWLClassExpression> subClasses;
065
066        @Unused
067        private boolean useWordNetDistance = false;
068        @ConfigOption(description = "only keep most general classes in suggestions", defaultValue = "true")
069        private boolean suggestMostGeneralClasses = true;
070        @ConfigOption(description = "include instance count / popularity when computing scores", defaultValue = "true")
071        private boolean useClassPopularity = true;
072
073        private Set<OWLClass> allClasses;
074
075        private boolean strictOWLMode = true;
076
077        public DisjointClassesLearner(SparqlEndpointKS ks) {
078                this.ks = ks;
079                
080                super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s a ?cls . FILTER NOT EXISTS {?s a ?cls_dis .}}");
081                super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s a ?cls ; a ?cls_dis .}");
082                super.existingAxiomsTemplate = new ParameterizedSparqlString("SELECT ?cls_dis WHERE {?cls owl:disjointWith ?cls_dis .}");
083                
084                axiomType = AxiomType.DISJOINT_CLASSES;
085        }
086        
087        /* (non-Javadoc)
088         * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#setEntityToDescribe(org.semanticweb.owlapi.model.OWLEntity)
089         */
090        @Override
091        public void setEntityToDescribe(OWLClass entityToDescribe) {
092                super.setEntityToDescribe(entityToDescribe);
093                
094                posExamplesQueryTemplate.setIri("cls", entityToDescribe.toStringID());
095                negExamplesQueryTemplate.setIri("cls", entityToDescribe.toStringID());
096                existingAxiomsTemplate.setIri("cls", entityToDescribe.toStringID());
097                
098                CLASS_OVERLAP_QUERY.setIri("cls", entityToDescribe.toStringID());
099                GIVEN_CLASS_OVERLAP_QUERY.setIri("cls", entityToDescribe.toStringID());
100        }
101        
102        /*
103         * (non-Javadoc)
104         * 
105         * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#learnAxioms()
106         */
107        @Override
108        protected void learnAxioms() {
109                run();
110//              runBatched();
111        }
112        
113        private void run() {
114                // get the candidates
115                SortedSet<OWLClass> candidates = getCandidates();
116
117                // check for each candidate if an overlap exists
118                int i = 1;
119                for (OWLClass cls : candidates) {
120                        logger.debug("Analyzing candidate class " + cls.toStringID());
121                        progressMonitor.learningProgressChanged(axiomType, i++, candidates.size());
122                        
123                        // get the popularity of the candidate
124                        int candidatePopularity = reasoner.getPopularity(cls);
125                        
126                        if(candidatePopularity == 0){// skip empty classes
127                                logger.warn("Cannot compute disjointness statements for empty candidate class " + cls);
128                                continue;
129                        }
130                        
131                        // get the number of overlapping instances, i.e. instances asserted to both classes
132                        GIVEN_CLASS_OVERLAP_QUERY.setIri("cls_other", cls.toStringID());
133                        ResultSet rs = executeSelectQuery(GIVEN_CLASS_OVERLAP_QUERY.toString());
134                        int overlap = rs.next().getLiteral("overlap").getInt();
135                        
136                        // compute the score
137                        double score = computeScore(candidatePopularity, popularity, overlap);
138                        
139                        int nrOfPosExamples = overlap;
140                        
141                        int nrOfNegExamples = popularity - nrOfPosExamples;
142                        
143                        currentlyBestAxioms.add(
144                                        new EvaluatedAxiom<>(
145                                                        df.getOWLDisjointClassesAxiom(entityToDescribe, cls),
146                                                        new AxiomScore(score, score, nrOfPosExamples, nrOfNegExamples, useSampling)));
147                }
148        }
149        
150        /**
151         * In this method we try to compute the overlap with each class in one single SPARQL query.
152         * This method might be much slower as the query is much more complex.
153         */
154        protected void runBatched() {
155                
156                ResultSet rs = executeSelectQuery(CLASS_OVERLAP_QUERY.toString());
157                ResultSetRewindable rsrw = ResultSetFactory.copyResults(rs);
158            int size = rsrw.size();
159            rs = rsrw;
160                while (rs.hasNext()) {
161                        QuerySolution qs = rsrw.next();
162                        OWLClass candidate = df.getOWLClass(IRI.create(qs.getResource("cls_other").getURI()));
163                        logger.debug("Analyzing candidate class " + candidate.toStringID());
164                        progressMonitor.learningProgressChanged(axiomType, rs.getRowNumber(), size);
165                        
166                        // get the popularity of the candidate
167                        int candidatePopularity = reasoner.getPopularity(candidate);
168                        
169                        // get the number of overlapping triples, i.e. triples with the same subject and object
170                        int overlap = qs.getLiteral("overlap").getInt();
171                        
172                        // compute the score
173                        double score = 1 - computeScore(candidatePopularity, popularity, overlap);
174
175                        int nrOfPosExamples = overlap;
176                        
177                        int nrOfNegExamples = popularity - nrOfPosExamples;
178                        
179                        currentlyBestAxioms.add(
180                                        new EvaluatedAxiom<>(
181                                                        df.getOWLDisjointClassesAxiom(entityToDescribe, candidate),
182                                                        new AxiomScore(score, score, nrOfPosExamples, nrOfNegExamples, useSampling)));
183                }
184        }
185        
186        /**
187         * Returns the candidate classes for comparison.
188         * @return the candidate classes
189         */
190        private SortedSet<OWLClass> getCandidates(){
191                SortedSet<OWLClass> candidates;
192                
193                if (strictOWLMode) {
194                        candidates = reasoner.getOWLClasses();
195                } else {
196                        candidates = reasoner.getClasses();
197                }
198                candidates.remove(entityToDescribe);
199                
200                // we do not have to consider subclasses
201                SortedSet<OWLClassExpression> subClasses = reasoner.getSubClasses(entityToDescribe, false);
202                candidates.removeAll(subClasses);
203                
204                return candidates;
205        }
206        
207        private double computeScore(int candidatePopularity, int popularity, int overlap){
208                // compute the estimated precision
209                double precision = Heuristics.getConfidenceInterval95WaldAverage(candidatePopularity, overlap);
210
211                // compute the estimated recall
212                double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap);
213
214                // compute the final score
215                double score = Heuristics.getFScore(recall, precision, 1.0);
216                
217                return score;
218        }
219
220        public boolean isUseWordNetDistance() {
221                return useWordNetDistance;
222        }
223
224        public void setUseWordNetDistance(boolean useWordNetDistance) {
225                this.useWordNetDistance = useWordNetDistance;
226        }
227
228        public boolean isSuggestMostGeneralClasses() {
229                return suggestMostGeneralClasses;
230        }
231
232        public void setSuggestMostGeneralClasses(boolean suggestMostGeneralClasses) {
233                this.suggestMostGeneralClasses = suggestMostGeneralClasses;
234        }
235        
236        public boolean isUseClassPopularity() {
237                return useClassPopularity;
238        }
239
240        public void setUseClassPopularity(boolean useClassPopularity) {
241                this.useClassPopularity = useClassPopularity;
242        }
243
244        /* (non-Javadoc)
245         * @see org.dllearner.core.AbstractAxiomLearningAlgorithm#getSampleQuery()
246         */
247        @Override
248        protected ParameterizedSparqlString getSampleQuery() {
249                return SAMPLE_QUERY;
250        }
251
252        /*
253         * (non-Javadoc)
254         * 
255         * @see
256         * org.dllearner.core.AbstractAxiomLearningAlgorithm#getExistingAxioms()
257         */
258        @Override
259        protected void getExistingAxioms() {
260                ResultSet rs = executeSelectQuery(existingAxiomsTemplate.toString());
261                QuerySolution qs;
262                while (rs.hasNext()) {
263                        qs = rs.next();
264                        if(qs.get("cls_dis").isResource()){
265                                OWLClass disjointClass = df.getOWLClass(IRI.create(qs.getResource("cls_dis").getURI()));
266                                existingAxioms.add(df.getOWLDisjointClassesAxiom(entityToDescribe, disjointClass));
267                        } else {
268                                logger.warn("We do not support complex disjoint classes.");
269                        }
270                        
271                }
272        }
273
274        @Override
275        public List<OWLClassExpression> getCurrentlyBestDescriptions(int nrOfDescriptions) {
276                List<OWLClassExpression> bestDescriptions = new ArrayList<>();
277                for (EvaluatedDescription<? extends Score> evDesc : getCurrentlyBestEvaluatedDescriptions(nrOfDescriptions)) {
278                        bestDescriptions.add(evDesc.getDescription());
279                }
280                return bestDescriptions;
281        }
282
283        @Override
284        public List<? extends EvaluatedDescription<? extends Score>> getCurrentlyBestEvaluatedDescriptions(int nrOfDescriptions) {
285                int max = Math.min(currentlyBestEvaluatedDescriptions.size(), nrOfDescriptions);
286                return currentlyBestEvaluatedDescriptions.subList(0, max);
287        }
288
289        private List<EvaluatedDescription<? extends Score>> buildEvaluatedClassDescriptions(Map<OWLClass, Integer> class2Count,
290                        Set<OWLClass> allClasses, int total) {
291                List<EvaluatedDescription<? extends Score>> evalDescs = new ArrayList<>();
292
293                //Remove temporarily entityToDescribe but keep track of their count
294                //                              Integer all = class2Count.get(entityToDescribe);
295                class2Count.remove(entityToDescribe);
296
297                //get complete disjoint classes
298                Set<OWLClass> completeDisjointclasses = new TreeSet<>(allClasses);
299                completeDisjointclasses.removeAll(class2Count.keySet());
300
301                // we remove the asserted subclasses here
302                completeDisjointclasses.removeAll(subClasses);
303                for (OWLClassExpression subClass : subClasses) {
304                        class2Count.remove(subClass);
305                }
306
307                //drop all classes which have a super class in this set
308                if (suggestMostGeneralClasses) {
309                        keepMostGeneralClasses(completeDisjointclasses);
310                }
311
312                EvaluatedDescription<? extends Score> evalDesc;
313                //firstly, create disjoint classexpressions which do not occur and give score of 1
314                for (OWLClass cls : completeDisjointclasses) {
315                        if (useClassPopularity) {
316                                int overlap = 0;
317                                int pop;
318                                if (ks.isRemote()) {
319                                        pop = reasoner.getPopularity(cls);
320                                } else {
321                                        pop = ((LocalModelBasedSparqlEndpointKS) ks).getModel().getOntClass(cls.toStringID())
322                                                        .listInstances().toSet().size();
323                                }
324                                //we skip classes with no instances
325                                if (pop == 0)
326                                        continue;
327
328                                // compute the estimated precision
329                                double precision = Heuristics.getConfidenceInterval95WaldAverage(pop, overlap);
330
331                                // compute the estimated recall
332                                double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap);
333
334                                // compute the final score
335                                double score = 1 - Heuristics.getFScore(recall, precision);
336
337                                evalDesc = new EvaluatedDescription(cls, new AxiomScore(score));
338                        } else {
339                                evalDesc = new EvaluatedDescription(cls, new AxiomScore(1));
340                        }
341
342                        evalDescs.add(evalDesc);
343                }
344
345                //secondly, create disjoint class expressions with score 1 - (#occurence/#all)
346                OWLClass cls;
347                for (Entry<OWLClass, Integer> entry : sortByValues(class2Count)) {
348                        cls = entry.getKey();
349                        // drop classes from OWL and RDF namespace
350                        if (cls.getIRI().isReservedVocabulary())
351                                continue;
352                        if (useClassPopularity) {
353                                int overlap = entry.getValue();
354                                int pop;
355                                if (ks.isRemote()) {
356                                        pop = reasoner.getPopularity(cls);
357                                } else {
358                                        pop = ((LocalModelBasedSparqlEndpointKS) ks).getModel().getOntClass(cls.toStringID())
359                                                        .listInstances().toSet().size();
360                                }
361                                // we skip classes with no instances
362                                if (pop == 0)
363                                        continue;
364
365                                // compute the estimated precision
366                                double precision = Heuristics.getConfidenceInterval95WaldAverage(pop, overlap);
367
368                                // compute the estimated recall
369                                double recall = Heuristics.getConfidenceInterval95WaldAverage(popularity, overlap);
370
371                                // compute the final score
372                                double score = 1 - Heuristics.getFScore(recall, precision);
373
374                                evalDesc = new EvaluatedDescription(cls, new AxiomScore(score));
375                        } else {
376                                evalDesc = new EvaluatedDescription(cls, new AxiomScore(1));
377                        }
378                        evalDescs.add(evalDesc);
379                }
380
381                class2Count.put(entityToDescribe, total);
382                return evalDescs;
383        }
384
385        private void keepMostGeneralClasses(Set<OWLClass> classes) {
386                if (ks.isRemote()) {
387                        if (reasoner.isPrepared()) {
388                                ClassHierarchy h = reasoner.getClassHierarchy();
389                                for (OWLClass nc : new HashSet<>(classes)) {
390                                        classes.removeAll(h.getSubClasses(nc));
391                                }
392                        }
393                } else {
394                        OntModel model = ((LocalModelBasedSparqlEndpointKS) ks).getModel();
395
396                        //                      Set<OWLClass> topClasses = new HashSet<OWLClass>();
397                        //                      for(OntClass cls : model.listOWLClasses().toSet()){
398                        //                              Set<OntClass> superClasses = cls.listSuperClasses().toSet();
399                        //                              if(superClasses.isEmpty() ||
400                        //                                              (superClasses.size() == 1 && superClasses.contains(model.getOntClass(org.apache.jena.vocabulary.OWL.Thing.getURI())))){
401                        //                                      topClasses.add(df.getOWLClass(IRI.create(cls.getURI()));
402                        //                              }
403                        //
404                        //                      }
405                        //                      classes.retainAll(topClasses);
406                        for (OWLClass nc : new HashSet<>(classes)) {//System.out.print(nc + "::");
407                                for (OntClass cls : model.getOntClass(nc.toStringID()).listSubClasses().toSet()) {//System.out.print(cls + "|");
408                                        classes.remove(df.getOWLClass(IRI.create(cls.getURI())));
409                                }
410                                //                              System.out.println();
411                        }
412
413                }
414        }
415
416        private void computeAllDisjointClassAxiomsOptimized() {
417                //get number of instances of A
418                int instanceCountA = reasoner.getPopularity(entityToDescribe);
419
420                //firstly, we compute the disjointness to all sibling classes
421                Set<EvaluatedDescription> disjointessOfSiblings = computeDisjointessOfSiblings(entityToDescribe);
422                System.out.println(disjointessOfSiblings);
423
424                //we go the hierarchy up
425                SortedSet<OWLClassExpression> superClasses = reasoner.getSuperClasses(entityToDescribe);
426                for (OWLClassExpression sup : superClasses) {
427                        Set<EvaluatedDescription> disjointessOfSuperClass = computeDisjointessOfSiblings(sup.asOWLClass());
428                        System.out.println(disjointessOfSuperClass);
429                }
430        }
431
432        private Set<EvaluatedDescription> computeDisjointessOfSiblings(OWLClass cls) {
433                Set<EvaluatedDescription> evaluatedDescriptions = new HashSet<>();
434
435                //get number of instances of A
436                int instanceCountA = reasoner.getPopularity(cls);
437
438                if (instanceCountA > 0) {
439                        //we compute the disjointness to all sibling classes
440                        Set<OWLClass> siblingClasses = reasoner.getSiblingClasses(cls);
441
442                        for (OWLClass sib : siblingClasses) {
443                                //get number of instances of B
444                                int instanceCountB = reasoner.getPopularity(sib);
445
446                                if (instanceCountB > 0) {
447                                        //get number of instances of (A and B)
448                                        int instanceCountAB = reasoner.getPopularityOf(df.getOWLObjectIntersectionOf(cls, sib));
449
450                                        // compute the estimated precision
451                                        double precision = Heuristics.getConfidenceInterval95WaldAverage(instanceCountB, instanceCountAB);
452
453                                        // compute the estimated recall
454                                        double recall = Heuristics.getConfidenceInterval95WaldAverage(instanceCountA, instanceCountAB);
455
456                                        // compute the final score
457                                        double score = 1 - Heuristics.getFScore(recall, precision);
458
459                                        EvaluatedDescription evalDesc = new EvaluatedDescription(sib, new AxiomScore(score));
460                                        evaluatedDescriptions.add(evalDesc);
461                                }
462                        }
463                }
464
465                return evaluatedDescriptions;
466        }
467
468        public EvaluatedAxiom<OWLDisjointClassesAxiom> computeDisjointess(OWLClass clsA, OWLClass clsB) {
469                logger.debug("Computing disjointness between " + clsA + " and " + clsB + "...");
470
471                //if clsA = clsB
472                if (clsA.equals(clsB)) {
473                        return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB),
474                                        new AxiomScore(0d, 1d));
475                }
476
477                //if the classes are connected via subsumption we assume that they are not disjoint
478                if (reasoner.isSuperClassOf(clsA, clsB) || reasoner.isSuperClassOf(clsB, clsA)) {
479                        return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB),
480                                        new AxiomScore(0d, 1d));
481                }
482
483                double scoreValue = 0;
484
485                //get number of instances of A
486                int instanceCountA = reasoner.getPopularity(clsA);
487
488                //get number of instances of B
489                int instanceCountB = reasoner.getPopularity(clsB);
490
491                if (instanceCountA > 0 && instanceCountB > 0) {
492                        //get number of instances of (A and B)
493                        int instanceCountAB = reasoner.getPopularityOf(df.getOWLObjectIntersectionOf(clsA, clsB));
494
495                        // compute the estimated precision
496                        double precision = Heuristics.getConfidenceInterval95WaldAverage(instanceCountB, instanceCountAB);
497
498                        // compute the estimated recall
499                        double recall = Heuristics.getConfidenceInterval95WaldAverage(instanceCountA, instanceCountAB);
500
501                        // compute the final score
502                        scoreValue = 1 - Heuristics.getFScore(recall, precision);
503
504                }
505
506                AxiomScore score = new AxiomScore(scoreValue);
507
508                return new EvaluatedAxiom<>(df.getOWLDisjointClassesAxiom(clsA, clsB), score);
509        }
510
511        public Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> computeSchemaDisjointness() {
512                Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> axioms = new HashSet<>();
513
514                Set<OWLClass> classes = reasoner.getOWLClasses("http://dbpedia.org/ontology/");
515                computeDisjointness(classes);
516
517                //start from the top level classes, i.e. the classes whose direct super class is owl:Thing
518                SortedSet<OWLClassExpression> topLevelClasses = reasoner.getMostGeneralClasses();
519                axioms.addAll(computeDisjointness(asOWLClasses(topLevelClasses)));
520
521                for (OWLClassExpression cls : topLevelClasses) {
522
523                }
524
525                return axioms;
526        }
527
528        public Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> computeDisjointness(Set<OWLClass> classes) {
529                Set<EvaluatedAxiom<OWLDisjointClassesAxiom>> axioms = new HashSet<>();
530
531                for (OWLClass clsA : classes) {
532                        for (OWLClass clsB : classes) {
533                                axioms.add(computeDisjointess(clsA, clsB));
534                        }
535                }
536
537                return axioms;
538        }
539
540        public static Set<OWLClass> asOWLClasses(Set<OWLClassExpression> descriptions) {
541                Set<OWLClass> classes = descriptions.stream()
542                                .filter(description -> !description.isAnonymous())
543                                .map(OWLClassExpression::asOWLClass)
544                                .collect(Collectors.toCollection(TreeSet::new));
545                return classes;
546        }
547        
548        public static void main(String[] args) throws Exception {
549                SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org"));
550                ks.init();
551                
552                DisjointClassesLearner la = new DisjointClassesLearner(ks);
553                la.setEntityToDescribe(new OWLClassImpl(IRI.create("http://dbpedia.org/ontology/Actor")));
554                la.setUseSampling(false);
555                la.init();
556                
557                la.start();
558                
559                la.getCurrentlyBestAxioms(10);
560        }
561}