001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities.sparql;
020
021import java.util.Collection;
022import java.util.HashSet;
023import java.util.Iterator;
024import java.util.Map.Entry;
025import java.util.Set;
026
027import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
028import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
029import org.dllearner.kb.sparql.SparqlEndpoint;
030
031import com.google.common.collect.HashMultimap;
032import com.google.common.collect.Multimap;
033import org.apache.jena.graph.Node;
034import org.apache.jena.graph.Triple;
035import org.apache.jena.query.ParameterizedSparqlString;
036import org.apache.jena.query.Query;
037import org.apache.jena.query.QueryExecution;
038import org.apache.jena.query.QueryFactory;
039import org.apache.jena.query.QuerySolution;
040import org.apache.jena.query.ResultSet;
041import org.apache.jena.sparql.core.TriplePath;
042import org.apache.jena.sparql.syntax.Element;
043import org.apache.jena.sparql.syntax.ElementGroup;
044import org.apache.jena.sparql.syntax.ElementOptional;
045import org.apache.jena.sparql.syntax.ElementPathBlock;
046import org.apache.jena.sparql.syntax.ElementTriplesBlock;
047import org.apache.jena.sparql.syntax.ElementUnion;
048import org.apache.jena.sparql.syntax.ElementVisitorBase;
049import org.apache.jena.vocabulary.RDF;
050
051/**
052 * @author Lorenz Buehmann
053 *
054 */
055public class RedundantTypeTriplePatternRemover extends ElementVisitorBase{
056        
057        private static final ParameterizedSparqlString superClassesQueryTemplate = new ParameterizedSparqlString(
058                        "SELECT ?sup WHERE {?sub rdfs:subClassOf+ ?sup .}");
059        
060        
061        private QueryExecutionFactory qef;
062
063        public RedundantTypeTriplePatternRemover(QueryExecutionFactory qef) {
064                this.qef = qef;
065        }
066        
067        /**
068         * Returns a pruned copy of the given query.
069         * @param query the query
070         * @return a pruned copy of the given query
071         */
072        public Query pruneQuery(Query query) {
073                Query copy = query.cloneQuery();
074                copy.getQueryPattern().visit(this);
075                return copy;
076        }
077        
078        private Set<Node> getSuperClasses(Node cls){
079                Set<Node> superClasses = new HashSet<>();
080                
081                superClassesQueryTemplate.setIri("sub", cls.getURI());
082                
083                String query = superClassesQueryTemplate.toString();
084                QueryExecution qe = qef.createQueryExecution(query);
085                ResultSet rs = qe.execSelect();
086                while(rs.hasNext()){
087                        QuerySolution qs = rs.next();
088                        superClasses.add(qs.getResource("sup").asNode());
089                }
090                qe.close();
091                
092                return superClasses;
093        }
094        
095        @Override
096        public void visit(ElementGroup el) {
097                for (Element e : el.getElements()) {
098                        e.visit(this);
099                }
100        }
101
102        @Override
103        public void visit(ElementOptional el) {
104                el.getOptionalElement().visit(this);
105        }
106
107        @Override
108        public void visit(ElementTriplesBlock el) {
109                // get all rdf:type triple patterns
110                Multimap<Node, Triple> subject2TypeTriples = HashMultimap.create();
111                for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) {
112                        Triple t = iterator.next();
113                        if(t.getPredicate().matches(RDF.type.asNode())) {
114                                subject2TypeTriples.put(t.getSubject(), t);
115                        }
116                }
117                
118                // check for semantically redundant triple patterns
119                Set<Triple> redundantTriples = new HashSet<>();
120                for (Entry<Node, Collection<Triple>> entry : subject2TypeTriples.asMap().entrySet()) {
121                        Collection<Triple> triples = entry.getValue();
122                        
123                        // get all super classes
124                        Set<Node> superClasses = new HashSet<>();
125                        for (Triple triple : triples) {
126                                Node cls = triple.getObject();
127                                superClasses.addAll(getSuperClasses(cls));
128                        }
129                        
130                        for (Triple triple : triples) {
131                                Node cls = triple.getObject();
132                                if(superClasses.contains(cls)) {
133                                        redundantTriples.add(triple);
134                                }
135                        }
136                }
137                
138                // remove redundant triple patterns
139                for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) {
140                        Triple t = iterator.next();
141                        if(redundantTriples.contains(t)) {
142                                iterator.remove();
143                        }
144                }
145        }
146
147        @Override
148        public void visit(ElementPathBlock el) {
149                // get all rdf:type triple patterns
150                Multimap<Node, Triple> subject2TypeTriples = HashMultimap.create();
151                for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) {
152                        TriplePath t = iterator.next();
153                        if (t.isTriple() && t.getPredicate().matches(RDF.type.asNode())) {
154                                subject2TypeTriples.put(t.getSubject(), t.asTriple());
155                        }
156                }
157
158                // check for semantically redundant triple patterns
159                Set<Triple> redundantTriples = new HashSet<>();
160                for (Entry<Node, Collection<Triple>> entry : subject2TypeTriples.asMap().entrySet()) {
161                        Collection<Triple> triples = entry.getValue();
162
163                        // get all super classes
164                        Set<Node> superClasses = new HashSet<>();
165                        for (Triple triple : triples) {
166                                Node cls = triple.getObject();
167                                superClasses.addAll(getSuperClasses(cls));
168                        }
169
170                        for (Triple triple : triples) {
171                                Node cls = triple.getObject();
172                                if (superClasses.contains(cls)) {
173                                        redundantTriples.add(triple);
174                                }
175                        }
176                }
177
178                // remove redundant triple patterns
179                for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) {
180                        TriplePath t = iterator.next();
181                        if (t.isTriple() && redundantTriples.contains(t.asTriple())) {
182                                iterator.remove();
183                        }
184                }
185        }
186
187        @Override
188        public void visit(ElementUnion el) {
189                for (Element e : el.getElements()) {
190                        e.visit(this);
191                }
192        }
193        
194        public static void main(String[] args) throws Exception {
195                String query = "SELECT DISTINCT  ?x0\n" + 
196                                "WHERE\n" + 
197                                "  { ?x0  <http://dbpedia.org/ontology/capital>  ?x7 ;\n" + 
198                                "         <http://dbpedia.org/ontology/currency>  <http://dbpedia.org/resource/West_African_CFA_franc> ;\n" + 
199                                "         <http://dbpedia.org/ontology/foundingDate>  ?x12 ;\n" + 
200                                "         <http://dbpedia.org/ontology/governmentType>  ?x13 ;\n" + 
201                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Country> ;\n" + 
202                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Place> ;\n" + 
203                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/PopulatedPlace> ;\n" + 
204                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Wikidata:Q532> .\n" + 
205                                "    ?x7  <http://dbpedia.org/ontology/country>  ?x8 ;\n" + 
206                                "         <http://dbpedia.org/ontology/elevation>  ?x9 ;\n" + 
207                                "         <http://dbpedia.org/ontology/isPartOf>  ?x10 ;\n" + 
208                                "         <http://dbpedia.org/ontology/populationTotal>  ?x11 ;\n" + 
209                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Place> ;\n" + 
210                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/PopulatedPlace> ;\n" + 
211                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Settlement> ;\n" + 
212                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Wikidata:Q532> .\n" + 
213                                "  }";
214                SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
215                QueryExecutionFactory qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs());
216                RedundantTypeTriplePatternRemover remover = new RedundantTypeTriplePatternRemover(qef);
217                System.out.println(remover.pruneQuery(QueryFactory.create(query)));
218        }
219        
220        
221
222}