001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.utilities.sparql; 020 021import java.util.Collection; 022import java.util.HashSet; 023import java.util.Iterator; 024import java.util.Map.Entry; 025import java.util.Set; 026 027import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 028import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp; 029import org.dllearner.kb.sparql.SparqlEndpoint; 030 031import com.google.common.collect.HashMultimap; 032import com.google.common.collect.Multimap; 033import org.apache.jena.graph.Node; 034import org.apache.jena.graph.Triple; 035import org.apache.jena.query.ParameterizedSparqlString; 036import org.apache.jena.query.Query; 037import org.apache.jena.query.QueryExecution; 038import org.apache.jena.query.QueryFactory; 039import org.apache.jena.query.QuerySolution; 040import org.apache.jena.query.ResultSet; 041import org.apache.jena.sparql.core.TriplePath; 042import org.apache.jena.sparql.syntax.Element; 043import org.apache.jena.sparql.syntax.ElementGroup; 044import org.apache.jena.sparql.syntax.ElementOptional; 045import org.apache.jena.sparql.syntax.ElementPathBlock; 046import org.apache.jena.sparql.syntax.ElementTriplesBlock; 047import org.apache.jena.sparql.syntax.ElementUnion; 048import org.apache.jena.sparql.syntax.ElementVisitorBase; 049import org.apache.jena.vocabulary.RDF; 050 051/** 052 * @author Lorenz Buehmann 053 * 054 */ 055public class RedundantTypeTriplePatternRemover extends ElementVisitorBase{ 056 057 private static final ParameterizedSparqlString superClassesQueryTemplate = new ParameterizedSparqlString( 058 "SELECT ?sup WHERE {?sub rdfs:subClassOf+ ?sup .}"); 059 060 061 private QueryExecutionFactory qef; 062 063 public RedundantTypeTriplePatternRemover(QueryExecutionFactory qef) { 064 this.qef = qef; 065 } 066 067 /** 068 * Returns a pruned copy of the given query. 069 * @param query the query 070 * @return a pruned copy of the given query 071 */ 072 public Query pruneQuery(Query query) { 073 Query copy = query.cloneQuery(); 074 copy.getQueryPattern().visit(this); 075 return copy; 076 } 077 078 private Set<Node> getSuperClasses(Node cls){ 079 Set<Node> superClasses = new HashSet<>(); 080 081 superClassesQueryTemplate.setIri("sub", cls.getURI()); 082 083 String query = superClassesQueryTemplate.toString(); 084 QueryExecution qe = qef.createQueryExecution(query); 085 ResultSet rs = qe.execSelect(); 086 while(rs.hasNext()){ 087 QuerySolution qs = rs.next(); 088 superClasses.add(qs.getResource("sup").asNode()); 089 } 090 qe.close(); 091 092 return superClasses; 093 } 094 095 @Override 096 public void visit(ElementGroup el) { 097 for (Element e : el.getElements()) { 098 e.visit(this); 099 } 100 } 101 102 @Override 103 public void visit(ElementOptional el) { 104 el.getOptionalElement().visit(this); 105 } 106 107 @Override 108 public void visit(ElementTriplesBlock el) { 109 // get all rdf:type triple patterns 110 Multimap<Node, Triple> subject2TypeTriples = HashMultimap.create(); 111 for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) { 112 Triple t = iterator.next(); 113 if(t.getPredicate().matches(RDF.type.asNode())) { 114 subject2TypeTriples.put(t.getSubject(), t); 115 } 116 } 117 118 // check for semantically redundant triple patterns 119 Set<Triple> redundantTriples = new HashSet<>(); 120 for (Entry<Node, Collection<Triple>> entry : subject2TypeTriples.asMap().entrySet()) { 121 Collection<Triple> triples = entry.getValue(); 122 123 // get all super classes 124 Set<Node> superClasses = new HashSet<>(); 125 for (Triple triple : triples) { 126 Node cls = triple.getObject(); 127 superClasses.addAll(getSuperClasses(cls)); 128 } 129 130 for (Triple triple : triples) { 131 Node cls = triple.getObject(); 132 if(superClasses.contains(cls)) { 133 redundantTriples.add(triple); 134 } 135 } 136 } 137 138 // remove redundant triple patterns 139 for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) { 140 Triple t = iterator.next(); 141 if(redundantTriples.contains(t)) { 142 iterator.remove(); 143 } 144 } 145 } 146 147 @Override 148 public void visit(ElementPathBlock el) { 149 // get all rdf:type triple patterns 150 Multimap<Node, Triple> subject2TypeTriples = HashMultimap.create(); 151 for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) { 152 TriplePath t = iterator.next(); 153 if (t.isTriple() && t.getPredicate().matches(RDF.type.asNode())) { 154 subject2TypeTriples.put(t.getSubject(), t.asTriple()); 155 } 156 } 157 158 // check for semantically redundant triple patterns 159 Set<Triple> redundantTriples = new HashSet<>(); 160 for (Entry<Node, Collection<Triple>> entry : subject2TypeTriples.asMap().entrySet()) { 161 Collection<Triple> triples = entry.getValue(); 162 163 // get all super classes 164 Set<Node> superClasses = new HashSet<>(); 165 for (Triple triple : triples) { 166 Node cls = triple.getObject(); 167 superClasses.addAll(getSuperClasses(cls)); 168 } 169 170 for (Triple triple : triples) { 171 Node cls = triple.getObject(); 172 if (superClasses.contains(cls)) { 173 redundantTriples.add(triple); 174 } 175 } 176 } 177 178 // remove redundant triple patterns 179 for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) { 180 TriplePath t = iterator.next(); 181 if (t.isTriple() && redundantTriples.contains(t.asTriple())) { 182 iterator.remove(); 183 } 184 } 185 } 186 187 @Override 188 public void visit(ElementUnion el) { 189 for (Element e : el.getElements()) { 190 e.visit(this); 191 } 192 } 193 194 public static void main(String[] args) throws Exception { 195 String query = "SELECT DISTINCT ?x0\n" + 196 "WHERE\n" + 197 " { ?x0 <http://dbpedia.org/ontology/capital> ?x7 ;\n" + 198 " <http://dbpedia.org/ontology/currency> <http://dbpedia.org/resource/West_African_CFA_franc> ;\n" + 199 " <http://dbpedia.org/ontology/foundingDate> ?x12 ;\n" + 200 " <http://dbpedia.org/ontology/governmentType> ?x13 ;\n" + 201 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Country> ;\n" + 202 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> ;\n" + 203 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> ;\n" + 204 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Wikidata:Q532> .\n" + 205 " ?x7 <http://dbpedia.org/ontology/country> ?x8 ;\n" + 206 " <http://dbpedia.org/ontology/elevation> ?x9 ;\n" + 207 " <http://dbpedia.org/ontology/isPartOf> ?x10 ;\n" + 208 " <http://dbpedia.org/ontology/populationTotal> ?x11 ;\n" + 209 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Place> ;\n" + 210 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PopulatedPlace> ;\n" + 211 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Settlement> ;\n" + 212 " <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Wikidata:Q532> .\n" + 213 " }"; 214 SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); 215 QueryExecutionFactory qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()); 216 RedundantTypeTriplePatternRemover remover = new RedundantTypeTriplePatternRemover(qef); 217 System.out.println(remover.pruneQuery(QueryFactory.create(query))); 218 } 219 220 221 222}