001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.utilities.examples; 020 021import com.google.common.collect.*; 022import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 023import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp; 024import org.apache.jena.query.QueryExecution; 025import org.apache.jena.query.QuerySolution; 026import org.apache.jena.query.ResultSet; 027import org.dllearner.kb.sparql.SparqlEndpoint; 028import org.dllearner.reasoning.SPARQLReasoner; 029import org.dllearner.utilities.datastructures.SetManipulation; 030import org.semanticweb.owlapi.model.*; 031import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 035 036import java.util.*; 037import java.util.Map.Entry; 038 039import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.*; 040/** 041 * 042 * Utility class for automatically retrieving negative examples from a 043 * SPARQL endpoint given a set of positive examples. 044 * 045 * @author Jens Lehmann 046 * @author Sebastian Hellmann 047 * 048 */ 049public class AutomaticNegativeExampleFinderSPARQL2 { 050 051 private static final Logger logger = LoggerFactory.getLogger(AutomaticNegativeExampleFinderSPARQL2.class.getSimpleName()); 052 053 private OWLDataFactory df = new OWLDataFactoryImpl(); 054 055 public enum Strategy{ 056 SUPERCLASS, SIBLING, RANDOM 057 } 058 059 // for re-using existing queries 060 private SPARQLReasoner sr; 061 062 private String namespace; 063 private QueryExecutionFactory qef; 064 065 public AutomaticNegativeExampleFinderSPARQL2(SparqlEndpoint se) { 066 this(new QueryExecutionFactoryHttp(se.getURL().toString(), se.getDefaultGraphURIs())); 067 } 068 069 public AutomaticNegativeExampleFinderSPARQL2(QueryExecutionFactory qef) { 070 this(new SPARQLReasoner(qef)); 071 } 072 073 public AutomaticNegativeExampleFinderSPARQL2(SPARQLReasoner reasoner) { 074 this.sr = reasoner; 075 this.qef = reasoner.getQueryExecutionFactory(); 076 } 077 078 public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, int limit) { 079 return getNegativeExamples(classToDescribe, positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM), limit); 080 } 081 082 public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, Collection<Strategy> strategies, int limit) { 083 Map<Strategy, Double> strategiesWithWeight = Maps.newLinkedHashMap(); 084 double weight = 1d/strategies.size(); 085 for (Strategy strategy : strategies) { 086 strategiesWithWeight.put(strategy, weight); 087 } 088 return getNegativeExamples(classToDescribe, positiveExamples, strategiesWithWeight, limit); 089 } 090 091 public SortedSet<OWLIndividual> getNegativeExamples(OWLClass classToDescribe, Set<OWLIndividual> positiveExamples, Map<Strategy, Double> strategiesWithWeight, int maxNrOfReturnedInstances) { 092 //set class to describe as the type for each instance 093 Multiset<OWLClass> types = HashMultiset.create(); 094 types.add(classToDescribe); 095 096 return computeNegativeExamples(classToDescribe, types, strategiesWithWeight, maxNrOfReturnedInstances); 097 } 098 099 public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, int limit) { 100 return getNegativeExamples(positiveExamples, Arrays.asList(SUPERCLASS, SIBLING, RANDOM), limit); 101 } 102 103 public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, Collection<Strategy> strategies, int limit) { 104 Map<Strategy, Double> strategiesWithWeight = new HashMap<>(); 105 double weight = 1d/strategies.size(); 106 for (Strategy strategy : strategies) { 107 strategiesWithWeight.put(strategy, weight); 108 } 109 return getNegativeExamples(positiveExamples, strategiesWithWeight, limit); 110 } 111 112 public SortedSet<OWLIndividual> getNegativeExamples(Set<OWLIndividual> positiveExamples, Map<Strategy, Double> strategiesWithWeight, int maxNrOfReturnedInstances) { 113 //get the types for each instance 114 Multiset<OWLClass> types = HashMultiset.create(); 115 for (OWLIndividual ex : positiveExamples) { 116 types.addAll(sr.getTypes(ex)); 117 } 118 119 //remove types that do not have the given namespace 120 types = filterByNamespace(types); 121 122 //keep the most specific types 123 keepMostSpecificClasses(types); 124 return computeNegativeExamples(null, types, strategiesWithWeight, maxNrOfReturnedInstances); 125 } 126 127 private SortedSet<OWLIndividual> computeNegativeExamples(OWLClass classToDescribe, 128 Multiset<OWLClass> positiveExamplesTypes, 129 Map<Strategy, Double> strategiesWithWeight, 130 int maxNrOfReturnedInstances) { 131 SortedSet<OWLIndividual> negativeExamples = new TreeSet<>(); 132 133 for (Entry<Strategy, Double> entry : strategiesWithWeight.entrySet()) { 134 Strategy strategy = entry.getKey(); 135 Double weight = entry.getValue(); 136 137 // the max number of instances returned by the current strategy 138 int strategyLimit = (int)(weight * maxNrOfReturnedInstances); 139 140 // the highest frequency value 141 int maxFrequency = positiveExamplesTypes.entrySet().iterator().next().getCount(); 142 143 if(strategy == SIBLING){//get sibling class based examples 144 negativeExamples.addAll(negativeExamplesBySiblingClasses(positiveExamplesTypes, strategyLimit, maxNrOfReturnedInstances)); 145 } else if(strategy == SUPERCLASS){//get super class based examples 146 negativeExamples.addAll(negativeExamplesBySuperClasses(positiveExamplesTypes, negativeExamples, strategyLimit, maxNrOfReturnedInstances)); 147 } else if(strategy == RANDOM){//get some random examples 148 logger.info("Applying random strategy..."); 149 SortedSet<OWLIndividual> randomNegativeExamples = new TreeSet<>(); 150 String query = "SELECT DISTINCT ?s WHERE {?s a ?type. ?type a owl:Class ."; 151 if(classToDescribe != null){ 152 query += "FILTER NOT EXISTS{?s a <" + classToDescribe.toStringID() + "> }"; 153 } else { 154 for (OWLClass nc : positiveExamplesTypes.elementSet()) { 155 156 } 157 throw new UnsupportedOperationException("Currently it's not possible to get random examples for unknown class to describe."); 158 } 159 160 query += "} LIMIT " + maxNrOfReturnedInstances; 161 162 try(QueryExecution qe = qef.createQueryExecution(query)) { 163 ResultSet rs = qe.execSelect(); 164 while (rs.hasNext()) { 165 QuerySolution qs = rs.next(); 166 randomNegativeExamples.add(df.getOWLNamedIndividual(IRI.create(qs.getResource("s").getURI()))); 167 } 168 } 169 randomNegativeExamples.removeAll(negativeExamples); 170 negativeExamples.addAll(new ArrayList<>(randomNegativeExamples).subList(0, Math.min(randomNegativeExamples.size(), maxNrOfReturnedInstances - negativeExamples.size()))); 171 logger.info("Negative examples(" + randomNegativeExamples.size() + "): " + randomNegativeExamples); 172 } 173 } 174 return negativeExamples; 175 } 176 177 private SortedSet<OWLIndividual> negativeExamplesBySuperClasses(Multiset<OWLClass> positiveExamplesTypes, 178 Set<OWLIndividual> negativeExamples, 179 int cnt, int totalCnt) { 180 logger.info("Applying super class strategy..."); 181 SortedSet<OWLIndividual> negExamples = new TreeSet<>(); 182 //for each type of the positive examples 183 for (OWLClass nc : positiveExamplesTypes.elementSet()) { 184 int frequency = positiveExamplesTypes.count(nc); 185 //get super classes 186 Set<OWLClassExpression> superClasses = sr.getSuperClasses(nc); 187 superClasses.remove(df.getOWLThing()); 188// superClasses.remove(Thing.instance); 189 superClasses.remove(df.getOWLClass(OWLRDFVocabulary.RDFS_RESOURCE.getIRI())); 190 superClasses = filterByNamespace(superClasses); 191 logger.info("Super classes: " + superClasses); 192 193 int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / superClasses.size() * cnt); 194 //get instances for each super class 195 for (OWLClassExpression superClass : superClasses) { 196 SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(superClass, nc, totalCnt); 197 individuals.removeAll(negativeExamples); 198 individuals.removeAll(negExamples); 199 SetManipulation.stableShrink(individuals, limit); 200 negExamples.addAll(individuals); 201 } 202 } 203 negExamples = SetManipulation.stableShrink(negExamples, cnt); 204 logger.info("Negative examples(" + negExamples.size() + "): " + negExamples); 205 return negExamples; 206 } 207 208 private SortedSet<OWLIndividual> negativeExamplesBySiblingClasses(Multiset<OWLClass> positiveExamplesTypes, int cnt, int totalCnt) { 209 logger.info("Applying sibling classes strategy..."); 210 SortedSet<OWLIndividual> negExamples = new TreeSet<>(); 211 212 // for each type of the positive examples 213 for (OWLClass nc : positiveExamplesTypes.elementSet()) { 214 int frequency = positiveExamplesTypes.count(nc); 215 216 // get sibling classes 217 Set<OWLClass> siblingClasses = sr.getSiblingClasses(nc); 218 siblingClasses = filterByNamespace(siblingClasses); 219 logger.info("Sibling classes: " + siblingClasses); 220 221 int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / siblingClasses.size() * cnt); 222 223 // get instances for each sibling class 224 for (OWLClass siblingClass : siblingClasses) { 225 SortedSet<OWLIndividual> individuals = sr.getIndividualsExcluding(siblingClass, nc, totalCnt); 226 individuals.removeAll(negExamples); 227 SetManipulation.stableShrink(individuals, limit); 228 negExamples.addAll(individuals); 229 } 230 } 231 negExamples = SetManipulation.stableShrink(negExamples, cnt); 232 logger.info("Negative examples(" + negExamples.size() + "): " + negExamples); 233 return negExamples; 234 } 235 236 private <T extends OWLClassExpression> Set<T> filterByNamespace(Set<T> classes){ 237 if(namespace != null){ 238 return Sets.filter(classes, input -> input.toString().startsWith(namespace)); 239 } 240 return classes; 241 } 242 243 private Multiset<OWLClass> filterByNamespace(Multiset<OWLClass> classes){ 244 if(namespace != null){ 245 return Multisets.filter(classes, input -> input.toStringID().startsWith(namespace)); 246 } 247 return classes; 248 } 249 250 private void keepMostSpecificClasses(Multiset<OWLClass> classes){ 251 HashMultiset<OWLClass> copy = HashMultiset.create(classes); 252 for (OWLClass nc1 : copy.elementSet()) { 253 for (OWLClass nc2 : copy.elementSet()) { 254 if(!nc1.equals(nc2)){ 255 //remove class nc1 if it is superclass of another class nc2 256 boolean isSubClassOf = false; 257 if(sr.getClassHierarchy() != null){ 258 isSubClassOf = sr.getClassHierarchy().isSubclassOf(nc2, nc1); 259 } else { 260 isSubClassOf = sr.isSuperClassOf(nc1, nc2); 261 } 262 if(isSubClassOf){ 263 classes.remove(nc1, classes.count(nc1)); 264 break; 265 } 266 } 267 } 268 } 269 } 270}