001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities.examples;
020
021import com.google.common.collect.*;
022import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
023import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
024import org.apache.jena.query.QueryExecution;
025import org.apache.jena.query.QuerySolution;
026import org.apache.jena.query.ResultSet;
027import org.dllearner.kb.sparql.SparqlEndpoint;
028import org.dllearner.reasoning.SPARQLReasoner;
029import org.dllearner.utilities.datastructures.SetManipulation;
030import org.semanticweb.owlapi.model.*;
031import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
035
036import java.util.*;
037import java.util.Map.Entry;
038
039import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.*;
040/**
041 * 
042 * Utility class for automatically retrieving negative examples from a 
043 * SPARQL endpoint given a set of positive examples.
044 * 
045 * @author Jens Lehmann
046 * @author Sebastian Hellmann
047 *
048 */
049public class AutomaticNegativeExampleFinderSPARQL2 {
050        
051        private static final Logger logger = LoggerFactory.getLogger(AutomaticNegativeExampleFinderSPARQL2.class.getSimpleName());
052        
053        private OWLDataFactory df = new OWLDataFactoryImpl();
054        
055        public enum Strategy{
056                SUPERCLASS, SIBLING, RANDOM
057        }
058
059        // for re-using existing queries
060        private SPARQLReasoner sr;
061
062        private String namespace;
063        private QueryExecutionFactory qef;
064        
065        public AutomaticNegativeExampleFinderSPARQL2(SparqlEndpoint se) {
066                this(new QueryExecutionFactoryHttp(se.getURL().toString(), se.getDefaultGraphURIs()));
067        }
068
069        public AutomaticNegativeExampleFinderSPARQL2(QueryExecutionFactory qef) {
070                this(new SPARQLReasoner(qef));
071        }
072
073        public AutomaticNegativeExampleFinderSPARQL2(SPARQLReasoner reasoner) {
074                this.sr = reasoner;
075                this.qef = reasoner.getQueryExecutionFactory();
076        }
077
078        public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, int limit) {
079                return getNegativeExamples(classToDescribe, positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM), limit);
080        }
081        
082        public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, Collection<Strategy> strategies, int limit) {
083                Map<Strategy, Double> strategiesWithWeight = Maps.newLinkedHashMap();
084                double weight = 1d/strategies.size();
085                for (Strategy strategy : strategies) {
086                        strategiesWithWeight.put(strategy, weight);
087                }
088                return getNegativeExamples(classToDescribe, positiveExamples, strategiesWithWeight, limit);
089        }
090        
091        public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, Map<Strategy, Double> strategiesWithWeight, int maxNrOfReturnedInstances) {
092                //set class to describe as the type for each instance
093                Multiset<OWLClass> types = HashMultiset.create();
094                types.add(classToDescribe);
095                
096                return computeNegativeExamples(classToDescribe, types, strategiesWithWeight, maxNrOfReturnedInstances);
097        }
098        
099        public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, int limit) {
100                return getNegativeExamples(positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM), limit);
101        }
102        
103        public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, Collection<Strategy> strategies, int limit) {
104                Map<Strategy, Double> strategiesWithWeight = new HashMap<>();
105                double weight = 1d/strategies.size();
106                for (Strategy strategy : strategies) {
107                        strategiesWithWeight.put(strategy, weight);
108                }
109                return getNegativeExamples(positiveExamples, strategiesWithWeight, limit);
110        }
111        
112        public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, Map<Strategy, Double> strategiesWithWeight, int maxNrOfReturnedInstances) {
113                //get the types for each instance
114                Multiset<OWLClass> types = HashMultiset.create();
115                for (OWLIndividual ex : positiveExamples) {
116                        types.addAll(sr.getTypes(ex));
117                }
118                
119                //remove types that do not have the given namespace
120                types = filterByNamespace(types);
121                
122                //keep the most specific types
123                keepMostSpecificClasses(types);
124                return computeNegativeExamples(null, types, strategiesWithWeight, maxNrOfReturnedInstances);
125        }
126
127        private SortedSet<OWLIndividual> computeNegativeExamples(OWLClass classToDescribe,
128                                                                                                                         Multiset<OWLClass> positiveExamplesTypes,
129                                                                                                                         Map<Strategy, Double> strategiesWithWeight,
130                                                                                                                         int maxNrOfReturnedInstances) {
131                SortedSet<OWLIndividual> negativeExamples = new TreeSet<>();
132                
133                for (Entry<Strategy, Double> entry : strategiesWithWeight.entrySet()) {
134                        Strategy strategy = entry.getKey();
135                        Double weight = entry.getValue();
136
137                        // the max number of instances returned by the current strategy
138                        int strategyLimit = (int)(weight * maxNrOfReturnedInstances);
139
140                        // the highest frequency value
141                        int maxFrequency = positiveExamplesTypes.entrySet().iterator().next().getCount();
142                        
143                        if(strategy == SIBLING){//get sibling class based examples
144                                negativeExamples.addAll(negativeExamplesBySiblingClasses(positiveExamplesTypes, strategyLimit, maxNrOfReturnedInstances));
145                        } else if(strategy == SUPERCLASS){//get super class based examples
146                                negativeExamples.addAll(negativeExamplesBySuperClasses(positiveExamplesTypes, negativeExamples, strategyLimit, maxNrOfReturnedInstances));
147                        } else if(strategy == RANDOM){//get some random examples
148                                logger.info("Applying random strategy...");
149                                SortedSet<OWLIndividual> randomNegativeExamples = new TreeSet<>();
150                                String query = "SELECT DISTINCT ?s WHERE {?s a ?type. ?type a owl:Class .";
151                                if(classToDescribe != null){
152                                        query += "FILTER NOT EXISTS{?s a <" + classToDescribe.toStringID() + "> }";
153                                } else {
154                                        for (OWLClass nc : positiveExamplesTypes.elementSet()) {
155                                                
156                                        }
157                                        throw new UnsupportedOperationException("Currently it's not possible to get random examples for unknown class to describe.");
158                                }
159                                
160                                query += "} LIMIT " + maxNrOfReturnedInstances;
161                                
162                                try(QueryExecution qe = qef.createQueryExecution(query)) {
163                                        ResultSet rs = qe.execSelect();
164                                        while (rs.hasNext()) {
165                                                QuerySolution qs = rs.next();
166                                                randomNegativeExamples.add(df.getOWLNamedIndividual(IRI.create(qs.getResource("s").getURI())));
167                                        }
168                                }
169                                randomNegativeExamples.removeAll(negativeExamples);
170                                negativeExamples.addAll(new ArrayList<>(randomNegativeExamples).subList(0, Math.min(randomNegativeExamples.size(), maxNrOfReturnedInstances - negativeExamples.size())));
171                                logger.info("Negative examples(" + randomNegativeExamples.size() + "): " + randomNegativeExamples);
172                        }
173                }
174        return negativeExamples;
175        }
176
177        private SortedSet<OWLIndividual> negativeExamplesBySuperClasses(Multiset<OWLClass> positiveExamplesTypes,
178                                                                                                                                        Set<OWLIndividual> negativeExamples,
179                                                                                                                                        int cnt, int totalCnt) {
180                logger.info("Applying super class strategy...");
181                SortedSet<OWLIndividual> negExamples = new TreeSet<>();
182                //for each type of the positive examples
183                for (OWLClass nc : positiveExamplesTypes.elementSet()) {
184                        int frequency = positiveExamplesTypes.count(nc);
185                        //get super classes
186                        Set<OWLClassExpression> superClasses = sr.getSuperClasses(nc);
187                        superClasses.remove(df.getOWLThing());
188//                                      superClasses.remove(Thing.instance);
189                        superClasses.remove(df.getOWLClass(OWLRDFVocabulary.RDFS_RESOURCE.getIRI()));
190                        superClasses = filterByNamespace(superClasses);
191                        logger.info("Super classes: " + superClasses);
192
193                        int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / superClasses.size() * cnt);
194                        //get instances for each super class
195                        for (OWLClassExpression superClass : superClasses) {
196                                SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(superClass, nc, totalCnt);
197                                individuals.removeAll(negativeExamples);
198                                individuals.removeAll(negExamples);
199                                SetManipulation.stableShrink(individuals, limit);
200                                negExamples.addAll(individuals);
201                        }
202                }
203                negExamples = SetManipulation.stableShrink(negExamples, cnt);
204                logger.info("Negative examples(" + negExamples.size() + "): " + negExamples);
205                return negExamples;
206        }
207
208        private SortedSet<OWLIndividual> negativeExamplesBySiblingClasses(Multiset<OWLClass> positiveExamplesTypes, int cnt, int totalCnt) {
209                logger.info("Applying sibling classes strategy...");
210                SortedSet<OWLIndividual> negExamples = new TreeSet<>();
211
212                // for each type of the positive examples
213                for (OWLClass nc : positiveExamplesTypes.elementSet()) {
214                        int frequency = positiveExamplesTypes.count(nc);
215
216                        // get sibling classes
217                        Set<OWLClass> siblingClasses = sr.getSiblingClasses(nc);
218                        siblingClasses = filterByNamespace(siblingClasses);
219                        logger.info("Sibling classes: " + siblingClasses);
220
221                        int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / siblingClasses.size() * cnt);
222
223                        // get instances for each sibling class
224                        for (OWLClass siblingClass : siblingClasses) {
225                                SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(siblingClass, nc, totalCnt);
226                                individuals.removeAll(negExamples);
227                                SetManipulation.stableShrink(individuals, limit);
228                                negExamples.addAll(individuals);
229                        }
230                }
231                negExamples = SetManipulation.stableShrink(negExamples, cnt);
232                logger.info("Negative examples(" + negExamples.size() + "): " + negExamples);
233                return negExamples;
234        }
235        
236        private <T extends OWLClassExpression> Set<T> filterByNamespace(Set<T> classes){
237                if(namespace != null){
238                        return Sets.filter(classes, input -> input.toString().startsWith(namespace));
239                }
240                return classes;
241        }
242        
243        private Multiset<OWLClass> filterByNamespace(Multiset<OWLClass> classes){
244                if(namespace != null){
245                        return Multisets.filter(classes, input -> input.toStringID().startsWith(namespace));
246                }
247                return classes;
248        }
249        
250        private void keepMostSpecificClasses(Multiset<OWLClass> classes){
251                HashMultiset<OWLClass> copy = HashMultiset.create(classes);
252                for (OWLClass nc1 : copy.elementSet()) {
253                        for (OWLClass nc2 : copy.elementSet()) {
254                                if(!nc1.equals(nc2)){
255                                        //remove class nc1 if it is superclass of another class nc2
256                                        boolean isSubClassOf = false;
257                                        if(sr.getClassHierarchy() != null){
258                                                isSubClassOf = sr.getClassHierarchy().isSubclassOf(nc2, nc1);
259                                        } else {
260                                                isSubClassOf = sr.isSuperClassOf(nc1, nc2);
261                                        }
262                                        if(isSubClassOf){
263                                                classes.remove(nc1, classes.count(nc1));
264                                                break;
265                                        }
266                                }
267                        }
268                }
269        }
270}