001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.utilities.examples; 020 021import java.util.Iterator; 022import java.util.SortedSet; 023import java.util.TreeSet; 024 025import org.apache.log4j.Logger; 026import org.dllearner.kb.sparql.SPARQLTasks; 027import org.dllearner.utilities.datastructures.SetManipulation; 028import org.dllearner.utilities.owl.OWLVocabulary; 029 030public class AutomaticNegativeExampleFinderSPARQL { 031 032 // LOGGER: ComponentManager 033 private static Logger logger = Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class); 034 035 private SPARQLTasks sparqltasks; 036 037 private SortedSet<String> filterClasses; 038 039 private SortedSet<String> fullPositiveSet; 040 041 private SortedSet<String> fromRelated = new TreeSet<>(); 042 private SortedSet<String> fromNearbyClasses = new TreeSet<>(); 043 private SortedSet<String> fromSuperclasses = new TreeSet<>(); 044 private SortedSet<String> fromParallelClasses = new TreeSet<>(); 045 private SortedSet<String> fromRandom = new TreeSet<>(); 046 private SortedSet<String> fromDomain = new TreeSet<>(); 047 private SortedSet<String> fromRange = new TreeSet<>(); 048 049 static int poslimit = 10; 050 static int neglimit = 20; 051 052 053 /** 054 * takes as input a full positive set to make sure no negatives are added as positives 055 * 056 * @param fullPositiveSet 057 * @param st 058 */ 059 public AutomaticNegativeExampleFinderSPARQL( 060 SortedSet<String> fullPositiveSet, 061 SPARQLTasks st, SortedSet<String> filterClasses) { 062 super(); 063 this.fullPositiveSet = new TreeSet<>(); 064 this.fullPositiveSet.addAll(fullPositiveSet); 065 this.sparqltasks = st; 066 this.filterClasses=filterClasses; 067 } 068 069 070 071 /** 072 * see <code> getNegativeExamples(int neglimit, boolean stable )</code> 073 * @param neglimit 074 */ 075 public SortedSet<String> getNegativeExamples(int neglimit ) { 076 return getNegativeExamples(neglimit, false); 077 } 078 079 /** 080 * aggregates all collected neg examples 081 * CAVE: it is necessary to call one of the make functions before calling this 082 * OTHERWISE it will choose random examples 083 * 084 * @param neglimit size of negative Example set, 0 means all, which can be quite large several thousands 085 * @param stable decides whether neg Examples are randomly picked, default false, faster for developing, since the cache can be used 086 */ 087 public SortedSet<String> getNegativeExamples(int neglimit, boolean stable ) { 088 SortedSet<String> negatives = new TreeSet<>(); 089 negatives.addAll(fromNearbyClasses); 090 negatives.addAll(fromParallelClasses); 091 negatives.addAll(fromRelated); 092 negatives.addAll(fromSuperclasses); 093 if(negatives.isEmpty()) { 094 negatives.addAll(fromRandom); 095 } 096 if(neglimit<=0){ 097 logger.debug("neg Example size NO shrinking: " + negatives.size()); 098 return negatives; 099 } 100 101 logger.debug("neg Example size before shrinking: " + negatives.size()); 102 if (stable) { 103 negatives = SetManipulation.stableShrink(negatives,neglimit); 104 } 105 else { 106 negatives = SetManipulation.fuzzyShrink(negatives,neglimit); 107 } 108 logger.debug("neg Example size after shrinking: " + negatives.size()); 109 return negatives; 110 } 111 112 113 public void makeNegativeExamplesFromRandomInstances() { 114 logger.debug("making random examples "); 115 String variable = "subject"; 116 String sparqlQueryString="SELECT ?"+variable+" WHERE {" + 117 "?"+variable+" <" +OWLVocabulary.RDF_TYPE+">" + " ?o" + 118 "}"; 119 120 fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable); 121 fromRandom.removeAll(fullPositiveSet); 122 logger.debug("|-negExample size from random: " + fromRandom.size()); 123 } 124 125 /** 126 * makes neg ex from related instances, that take part in a role R(pos,neg) 127 * filters all objects, that don't use the given namespace 128 * @param instances 129 * @param objectNamespace 130 */ 131 public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances, 132 String objectNamespace) { 133 logger.debug("making examples from related instances"); 134 for (String oneInstance : instances) { 135 makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace); 136 } 137 logger.debug("|-negExample size from related: " + fromRelated.size()); 138 } 139 140 private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) { 141 // SortedSet<String> result = new TreeSet<String>(); 142 143 String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n" 144 + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; 145 146 fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); 147 fromRelated.removeAll(fullPositiveSet); 148 149 } 150 151 // keep a while may still be needed 152 /*public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { 153 // SortedSet<String> result = new TreeSet<String>(); 154 155 String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" 156 + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" 157 + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}"; 158 159 this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o")); 160 161 }*/ 162 163 public void makeNegativeExamplesFromNearbyClasses(SortedSet<String> positiveSet, int sparqlResultLimit){ 164 SortedSet<String> classes = new TreeSet<>(); 165 Iterator<String> instanceIter = positiveSet.iterator(); 166 while(classes.isEmpty() && instanceIter.hasNext()) { 167 classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), 100)); 168 } 169 String concept=classes.first(); 170 if (filterClasses!=null&&filterClasses.size()>0){ 171 boolean br=false; 172 for (String oneClass : classes){ 173 for (String filterClass : filterClasses) { 174 if (oneClass.startsWith(filterClass)) { 175 break; 176 } else { 177 concept = oneClass; 178 br = true; 179 break; 180 } 181 } 182 if (br) break; 183 } 184 } 185 concept = concept.replaceAll("\"", ""); 186 SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, 1); 187 188 classes = new TreeSet<>(); 189 for (String oneSuperClass : superClasses) { 190 classes.addAll(sparqltasks.getSubClasses(oneSuperClass, 1)); 191 } 192 classes.remove(concept); 193 for (String oneClass : classes) { 194 try{ 195 fromNearbyClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" 196 + oneClass + "\"", sparqlResultLimit)); 197 } catch (Exception e){e.printStackTrace();} 198 } 199 200 this.fromNearbyClasses.removeAll(fullPositiveSet); 201 } 202 203 /** 204 * makes negEx from classes, the posEx belong to. 205 * Gets all Classes from PosEx, gets Instances from these Classes, returns all 206 * @param positiveSet 207 * @param sparqlResultLimit 208 */ 209 public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int sparqlResultLimit){ 210 makeNegativeExamplesFromClassesOfInstances(positiveSet, sparqlResultLimit); 211 } 212 213 private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet, 214 int sparqlResultLimit) { 215 logger.debug("making neg Examples from parallel classes"); 216 SortedSet<String> classes = new TreeSet<>(); 217 // superClasses.add(concept.replace("\"", "")); 218 // logger.debug("before"+superClasses); 219 // superClasses = dbpediaGetSuperClasses( superClasses, 4); 220 // logger.debug("getting negExamples from "+superClasses.size()+" 221 // superclasses"); 222 223 for (String instance : positiveSet) { 224 try{ 225 classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit)); 226 }catch (Exception e) { 227 e.printStackTrace(); 228 logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt"); 229 } 230 } 231 logger.debug("getting negExamples from " + classes.size() + " parallel classes"); 232 for (String oneClass : classes) { 233 logger.debug(oneClass); 234 // rsc = new 235 // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); 236 try{ 237 this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass 238 + "\"", sparqlResultLimit)); 239 }catch (Exception e) { 240 logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt"); 241 } 242 } 243 244 fromParallelClasses.removeAll(fullPositiveSet); 245 logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size()); 246 247 } 248 249 250 251 /** 252 * it gets the first class of an arbitrary instance and queries the superclasses of it, 253 * could be more elaborate. 254 * It is better to use makeNegativeExamplesFromSuperClasses 255 * @param positiveSet 256 * @param sparqlResultSetLimit 257 */ 258 public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet, 259 int sparqlResultSetLimit) { 260 SortedSet<String> classes = new TreeSet<>(); 261 Iterator<String> instanceIter = positiveSet.iterator(); 262 while(classes.isEmpty() && instanceIter.hasNext()) { 263 classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), sparqlResultSetLimit)); 264 265 } 266 makeNegativeExamplesFromSuperClasses(classes.first(), sparqlResultSetLimit); 267 } 268 269 270 public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit) { 271 makeNegativeExamplesFromSuperClasses( concept, sparqlResultSetLimit, 2); 272 } 273 274 /** 275 * if pos ex derive from one class, then neg ex are taken from a superclass 276 * @param concept 277 * @param sparqlResultSetLimit 278 */ 279 public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit, int depth) { 280 281 concept = concept.replaceAll("\"", ""); 282 // superClasses.add(concept.replace("\"", "")); 283 // logger.debug("before"+superClasses); 284 SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, depth); 285 logger.debug("making neg Examples from " + superClasses.size() + " superclasses"); 286 287 for (String oneSuperClass : superClasses) { 288 logger.debug(oneSuperClass); 289 fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" 290 + oneSuperClass + "\"", sparqlResultSetLimit)); 291 292 } 293 this.fromSuperclasses.removeAll(fullPositiveSet); 294 logger.debug("|-neg Example from superclass: " + fromSuperclasses.size()); 295 } 296 297 @SuppressWarnings("unused") 298 private void makeNegativeExamplesFromDomain(String role, int sparqlResultSetLimit){ 299 logger.debug("making Negative Examples from Domain of : "+role); 300 fromDomain.addAll(sparqltasks.getDomainInstances(role, sparqlResultSetLimit)); 301 fromDomain.removeAll(fullPositiveSet); 302 logger.debug("|-neg Example size from Domain: "+this.fromDomain.size()); 303 } 304 305 @SuppressWarnings("unused") 306 private void makeNegativeExamplesFromRange(String role, int sparqlResultSetLimit){ 307 logger.debug("making Negative Examples from Range of : "+role); 308 fromRange.addAll(sparqltasks.getRangeInstances(role, sparqlResultSetLimit)); 309 fromRange.removeAll(fullPositiveSet); 310 logger.debug("|-neg Example size from Range: "+fromRange.size()); 311 } 312}