001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities.examples;
020
021import java.util.Iterator;
022import java.util.SortedSet;
023import java.util.TreeSet;
024
025import org.apache.log4j.Logger;
026import org.dllearner.kb.sparql.SPARQLTasks;
027import org.dllearner.utilities.datastructures.SetManipulation;
028import org.dllearner.utilities.owl.OWLVocabulary;
029
030public class AutomaticNegativeExampleFinderSPARQL {
031
032        // LOGGER: ComponentManager
033        private static Logger logger = Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class);
034
035        private SPARQLTasks sparqltasks;
036
037        private SortedSet<String> filterClasses;
038        
039        private SortedSet<String> fullPositiveSet;
040        
041        private SortedSet<String> fromRelated  = new TreeSet<>();
042        private SortedSet<String> fromNearbyClasses  = new TreeSet<>();
043        private SortedSet<String> fromSuperclasses = new TreeSet<>();
044        private SortedSet<String> fromParallelClasses = new TreeSet<>();
045        private SortedSet<String> fromRandom = new TreeSet<>();
046        private SortedSet<String> fromDomain = new TreeSet<>();
047        private SortedSet<String> fromRange = new TreeSet<>();
048
049        static int poslimit = 10;
050        static int neglimit = 20;
051
052        
053        /**
054         * takes as input a full positive set to make sure no negatives are added as positives
055         *  
056         * @param fullPositiveSet
057         * @param st
058         */
059        public AutomaticNegativeExampleFinderSPARQL(
060                        SortedSet<String> fullPositiveSet,
061                        SPARQLTasks st, SortedSet<String> filterClasses) {
062                super();
063                this.fullPositiveSet = new TreeSet<>();
064                this.fullPositiveSet.addAll(fullPositiveSet);
065                this.sparqltasks = st;
066                this.filterClasses=filterClasses;
067        }
068        
069        
070        
071        /**
072         * see <code>  getNegativeExamples(int neglimit, boolean stable )</code>
073         * @param neglimit
074         */
075        public SortedSet<String> getNegativeExamples(int neglimit ) {
076                return getNegativeExamples(neglimit, false);
077        }
078
079        /**
080         * aggregates all collected neg examples
081         * CAVE: it is necessary to call one of the make functions before calling this
082         * OTHERWISE it will choose random examples
083         * 
084         * @param neglimit size of negative Example set, 0 means all, which can be quite large several thousands
085         * @param stable decides whether neg Examples are randomly picked, default false, faster for developing, since the cache can be used
086         */
087        public SortedSet<String> getNegativeExamples(int neglimit, boolean stable ) {
088                SortedSet<String> negatives = new TreeSet<>();
089                negatives.addAll(fromNearbyClasses);
090                negatives.addAll(fromParallelClasses);
091                negatives.addAll(fromRelated);
092                negatives.addAll(fromSuperclasses);
093                if(negatives.isEmpty()) {
094                        negatives.addAll(fromRandom);
095                }
096                if(neglimit<=0){
097                        logger.debug("neg Example size NO shrinking: " + negatives.size());
098                        return negatives;
099                }
100                
101                logger.debug("neg Example size before shrinking: " + negatives.size());
102                if (stable) {
103                        negatives = SetManipulation.stableShrink(negatives,neglimit);
104                }
105                else {
106                        negatives = SetManipulation.fuzzyShrink(negatives,neglimit);
107                }
108                logger.debug("neg Example size after shrinking: " + negatives.size());
109                return negatives;
110        }
111
112        
113        public void makeNegativeExamplesFromRandomInstances() {
114                logger.debug("making random examples ");
115                String variable = "subject";
116                String sparqlQueryString="SELECT ?"+variable+" WHERE {" +
117                                "?"+variable+" <" +OWLVocabulary.RDF_TYPE+">" + " ?o" + 
118                                "}";
119                
120                fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable);
121                fromRandom.removeAll(fullPositiveSet);
122                logger.debug("|-negExample size from random: " + fromRandom.size());
123        }
124        
125        /**
126         * makes neg ex from related instances, that take part in a role R(pos,neg)
127         * filters all objects, that don't use the given namespace 
128         * @param instances
129         * @param objectNamespace
130         */
131        public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances,
132                        String objectNamespace) {
133                logger.debug("making examples from related instances");
134                for (String oneInstance : instances) {
135                        makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace);
136                }
137                logger.debug("|-negExample size from related: " + fromRelated.size());
138        }
139
140        private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) {
141                // SortedSet<String> result = new TreeSet<String>();
142
143                String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n"
144                                + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}";
145
146                fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object"));
147                fromRelated.removeAll(fullPositiveSet);
148
149        }
150
151        // keep a while may still be needed
152        /*public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) {
153                // SortedSet<String> result = new TreeSet<String>();
154
155                String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n"
156                                + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n"
157                                + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}";
158
159                this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o"));
160
161        }*/
162        
163        public void makeNegativeExamplesFromNearbyClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
164                SortedSet<String> classes = new TreeSet<>();
165                Iterator<String> instanceIter = positiveSet.iterator();
166                while(classes.isEmpty() && instanceIter.hasNext()) {
167                        classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), 100));
168                }
169                String concept=classes.first();
170                if (filterClasses!=null&&filterClasses.size()>0){
171                        boolean br=false;
172                        for (String oneClass : classes){
173                                for (String filterClass : filterClasses) {
174                                        if (oneClass.startsWith(filterClass)) {
175                                                break;
176                                        } else {
177                                                concept = oneClass;
178                                                br = true;
179                                                break;
180                                        }
181                                }
182                                if (br) break;
183                        }
184                }
185                concept = concept.replaceAll("\"", "");
186                SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, 1);
187                
188                classes = new TreeSet<>();
189                for (String oneSuperClass : superClasses) {
190                        classes.addAll(sparqltasks.getSubClasses(oneSuperClass, 1));
191                }
192                classes.remove(concept);
193                for (String oneClass : classes) {
194                        try{
195                                fromNearbyClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
196                                                + oneClass + "\"", sparqlResultLimit));
197                        } catch (Exception e){e.printStackTrace();}
198                }
199                
200                this.fromNearbyClasses.removeAll(fullPositiveSet);
201        }
202        
203        /**
204         * makes negEx from classes, the posEx belong to.
205         * Gets all Classes from PosEx, gets Instances from these Classes, returns all
206         * @param positiveSet
207         * @param sparqlResultLimit
208         */
209        public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
210                makeNegativeExamplesFromClassesOfInstances(positiveSet, sparqlResultLimit);
211        }
212        
213        private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet,
214                        int sparqlResultLimit) {
215                logger.debug("making neg Examples from parallel classes");
216                SortedSet<String> classes = new TreeSet<>();
217                // superClasses.add(concept.replace("\"", ""));
218                // logger.debug("before"+superClasses);
219                // superClasses = dbpediaGetSuperClasses( superClasses, 4);
220                // logger.debug("getting negExamples from "+superClasses.size()+"
221                // superclasses");
222
223                for (String instance : positiveSet) {
224                        try{
225                        classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit));
226                        }catch (Exception e) {
227                                e.printStackTrace();
228                                logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
229                        }
230                }
231                logger.debug("getting negExamples from " + classes.size() + " parallel classes");
232                for (String oneClass : classes) {
233                        logger.debug(oneClass);
234                        // rsc = new
235                        // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit));
236                        try{
237                        this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass
238                                        + "\"", sparqlResultLimit));
239                        }catch (Exception e) {
240                                logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
241                        }
242                }
243                
244                fromParallelClasses.removeAll(fullPositiveSet);
245                logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size());
246
247        }
248
249        
250        
251        /**
252         * it gets the first class of an arbitrary  instance and queries the superclasses of it,
253         * could be more elaborate.
254         * It is better to use makeNegativeExamplesFromSuperClasses
255         * @param positiveSet
256         * @param sparqlResultSetLimit
257         */
258        public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet, 
259                        int sparqlResultSetLimit) {
260                SortedSet<String> classes = new TreeSet<>();
261                Iterator<String> instanceIter = positiveSet.iterator();
262                while(classes.isEmpty() && instanceIter.hasNext()) {
263                        classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), sparqlResultSetLimit));
264                
265                }
266                makeNegativeExamplesFromSuperClasses(classes.first(), sparqlResultSetLimit);
267        }
268        
269
270        public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit) {
271                makeNegativeExamplesFromSuperClasses( concept,  sparqlResultSetLimit, 2);
272        }
273        
274        /**
275         * if pos ex derive from one class, then neg ex are taken from a superclass
276         * @param concept
277         * @param sparqlResultSetLimit
278         */
279        public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit, int depth) {
280
281                concept = concept.replaceAll("\"", "");
282                // superClasses.add(concept.replace("\"", ""));
283                // logger.debug("before"+superClasses);
284                SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, depth);
285                logger.debug("making neg Examples from " + superClasses.size() + " superclasses");
286
287                for (String oneSuperClass : superClasses) {
288                        logger.debug(oneSuperClass);
289                        fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
290                                        + oneSuperClass + "\"", sparqlResultSetLimit));
291
292                }
293                this.fromSuperclasses.removeAll(fullPositiveSet);
294                logger.debug("|-neg Example from superclass: " + fromSuperclasses.size());
295        }
296        
297        @SuppressWarnings("unused")
298        private void makeNegativeExamplesFromDomain(String role, int sparqlResultSetLimit){
299                logger.debug("making Negative Examples from Domain of : "+role);
300                fromDomain.addAll(sparqltasks.getDomainInstances(role, sparqlResultSetLimit));
301                fromDomain.removeAll(fullPositiveSet);
302                logger.debug("|-neg Example size from Domain: "+this.fromDomain.size());
303        }
304        
305        @SuppressWarnings("unused")
306        private void makeNegativeExamplesFromRange(String role, int sparqlResultSetLimit){
307                logger.debug("making Negative Examples from Range of : "+role);
308                fromRange.addAll(sparqltasks.getRangeInstances(role, sparqlResultSetLimit));
309                fromRange.removeAll(fullPositiveSet);
310                logger.debug("|-neg Example size from Range: "+fromRange.size());
311        }
312}