001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities;
020
021import com.google.common.collect.HashMultimap;
022import com.google.common.collect.Multimap;
023import com.google.common.collect.Sets;
024import com.mxgraph.layout.hierarchical.mxHierarchicalLayout;
025import com.mxgraph.util.mxCellRenderer;
026import com.mxgraph.util.mxConstants;
027import com.mxgraph.util.png.mxPngEncodeParam;
028import com.mxgraph.util.png.mxPngImageEncoder;
029import com.mxgraph.view.mxStylesheet;
030import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
031import org.apache.commons.collections15.ListUtils;
032import org.apache.jena.datatypes.RDFDatatype;
033import org.apache.jena.graph.Node;
034import org.apache.jena.graph.Triple;
035import org.apache.jena.query.*;
036import org.apache.jena.rdf.model.impl.Util;
037import org.apache.jena.shared.PrefixMapping;
038import org.apache.jena.sparql.core.TriplePath;
039import org.apache.jena.sparql.core.Var;
040import org.apache.jena.sparql.syntax.*;
041import org.apache.jena.sparql.util.VarUtils;
042import org.apache.jena.vocabulary.RDF;
043import org.dllearner.kb.sparql.CBDStructureTree;
044import org.jgrapht.ext.JGraphXAdapter;
045import org.jgrapht.graph.DefaultDirectedGraph;
046import org.jgrapht.graph.DefaultEdge;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050import java.awt.*;
051import java.awt.image.BufferedImage;
052import java.io.File;
053import java.io.FileOutputStream;
054import java.io.IOException;
055import java.util.*;
056import java.util.List;
057import java.util.Map.Entry;
058import java.util.stream.Collectors;
059import java.util.stream.Stream;
060
061/**
062 * Utilities for SPARQL queries.
063 */
064public class QueryUtils extends ElementVisitorBase {
065        
066private static final Logger logger = LoggerFactory.getLogger(QueryUtils.class); 
067
068        private static final ParameterizedSparqlString superClassesQueryTemplate = new ParameterizedSparqlString(
069                        "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> "
070                        + "SELECT ?sup WHERE {"
071                        + "?sub ((rdfs:subClassOf|owl:equivalentClass)|^owl:equivalentClass)+ ?sup .}");
072        
073        private Set<Triple> triplePattern;
074        private Set<Triple> optionalTriplePattern;
075        
076        private boolean inOptionalClause = false;
077        
078        private int unionCount = 0;
079        private int optionalCount = 0;
080        private int filterCount = 0;
081        
082        private Map<Triple, ElementGroup> triple2Parent = new HashMap<>();
083        
084        Stack<ElementGroup> parents = new Stack<>();
085        
086        public static String addPrefix(String queryString, Map<String, String> prefix2Namespace){
087                Query query = QueryFactory.create(queryString);
088                for (Entry<String, String> entry : prefix2Namespace.entrySet()) {
089                        String prefix = entry.getKey();
090                        String namespace = entry.getValue();
091                        query.setPrefix(prefix, namespace);
092                }
093                return query.toString();
094        }
095        
096        public static String addPrefixes(String queryString, String prefix, String namespace){
097                Query query = QueryFactory.create(queryString);
098                query.setPrefix(prefix, namespace);
099                return query.toString();
100        }
101
102        /**
103         * Remove unused prefix declarations from preamble.
104         *
105         * @param query the query
106         */
107        public static void prunePrefixes(Query query) {
108                PrefixMapping pm = query.getPrefixMapping();
109                String baseURI = query.getBaseURI();
110                query.setBaseURI((String) null);
111
112                Set<String> usedPrefixes = Sets.newHashSet();
113                getNodes(query).forEach(node -> {
114                        String ns = null;
115                        if(node.isURI()) {
116                                ns = node.getNameSpace();
117                        } else if(node.isLiteral()) {
118                                RDFDatatype dt = node.getLiteralDatatype();
119                                if(dt != null) {
120                                        String uri = dt.getURI();
121                                        ns = uri.substring(0, Util.splitNamespaceXML(uri));
122                                }
123                        }
124
125                        if(ns != null) {
126                                // check if base URI has been used
127                                if(ns.equals(baseURI)) {
128                                        query.setBaseURI(baseURI);
129                                }
130                                // check if one of the prefixes has been used
131                                String prefix = pm.getNsURIPrefix(ns);
132                                if(prefix != null) {
133                                        usedPrefixes.add(prefix);
134                                }
135                        }
136                });
137                // override prefix map
138                Map<String, String> prefixMap = pm.getNsPrefixMap();
139                prefixMap.entrySet().removeIf(mapping -> !usedPrefixes.contains(mapping.getKey()));
140                pm.clearNsPrefixMap();
141                pm.setNsPrefixes(prefixMap);
142        }
143
144        /**
145         * @param query the query
146         * @return all nodes that occur in triple patterns of the query
147         */
148        public static Set<Node> getNodes(Query query) {
149                return getTriplePatterns(query).stream()
150                                .map(QueryUtils::getNodes)
151                                .flatMap(Collection::stream)
152                                .collect(Collectors.toSet());
153        }
154
155        /**
156         * Convert triple pattern to a set of nodes {s, p, o}
157         *
158         * @param t the triple pattern
159         * @return set of nodes {s, p, o}
160         */
161        public static Set<Node> getNodes(Triple t) {
162                return Sets.newHashSet(t.getSubject(), t.getPredicate(), t.getObject());
163        }
164        
165        /**
166         * Returns all variables that occur in a triple pattern of the SPARQL query.
167         * @param query the query
168         * @return
169         */
170        public Set<Var> getVariables(Query query){
171                Set<Var> vars = new HashSet<>();
172                
173                Set<Triple> triplePatterns = extractTriplePattern(query, false);
174                
175                for (Triple tp : triplePatterns) {
176                        if(tp.getSubject().isVariable()){
177                                vars.add(Var.alloc(tp.getSubject()));
178                        } else if(tp.getObject().isVariable()){
179                                vars.add(Var.alloc(tp.getObject()));
180                        } else if(tp.getPredicate().isVariable()){
181                                vars.add(Var.alloc(tp.getPredicate()));
182                        }
183                }
184                
185                return vars;
186        }
187        
188        /**
189         * Returns all variables that occur as subject in a triple pattern of the SPARQL query.
190         * @param query the query
191         * @return
192         */
193        public Set<Var> getSubjectVariables(Query query){
194
195                Set<Triple> triplePatterns = extractTriplePattern(query, false);
196
197                Set<Var> vars = new HashSet<>();
198                for (Triple tp : triplePatterns) {
199                        if(tp.getSubject().isVariable()){
200                                vars.add(Var.alloc(tp.getSubject()));
201                        }
202                }
203                
204                return vars;
205        }
206        
207        /**
208         * Returns all variables that occur as subject in a triple pattern of the SPARQL query.
209         * @param query the query
210         * @return
211         */
212        public static Set<Var> getSubjectVars(Query query){
213                final Set<Var> vars = new HashSet<>();
214                
215                ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){
216                        @Override
217                        public void visit(ElementTriplesBlock el) {
218                                Iterator<Triple> triples = el.patternElts();
219                    while (triples.hasNext()) {
220                        Triple triple = triples.next();
221                        if(triple.getSubject().isVariable()) {
222                                vars.add(Var.alloc(triples.next().getSubject()));
223                        }
224                    }
225                        }
226                        
227                        @Override
228                        public void visit(ElementPathBlock el) {
229                                Iterator<TriplePath> triples = el.patternElts();
230                    while (triples.hasNext()) {
231                        TriplePath triple = triples.next();
232                        if(triple.getSubject().isVariable()) {
233                                vars.add(Var.alloc(triples.next().getSubject()));
234                        }
235                    }
236                        }
237                });
238                
239                return vars;
240        }
241        
242        public static Set<Triple> getTriplePatterns(Query query){
243                final Set<Triple> triplePatterns = Sets.newHashSet();
244                
245                ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){
246                        @Override
247                        public void visit(ElementTriplesBlock el) {
248                                Iterator<Triple> triples = el.patternElts();
249                    while (triples.hasNext()) {
250                        Triple triple = triples.next();
251                        triplePatterns.add(triple);
252                    }
253                        }
254                        
255                        @Override
256                        public void visit(ElementPathBlock el) {
257                                Iterator<TriplePath> triplePaths = el.patternElts();
258                    while (triplePaths.hasNext()) {
259                        TriplePath tp = triplePaths.next();
260                        if(tp.isTriple()) {
261                                Triple triple = tp.asTriple();
262                                triplePatterns.add(triple);
263                        }
264                    }
265                        }
266                });
267                return triplePatterns;
268        }
269        
270        /**
271         * Given a SPARQL query and a start node, return the outgoing
272         * triple patterns.
273         * @param query the query
274         * @param source the start node
275         * @return
276         */
277        public static Set<Triple> getOutgoingTriplePatterns(Query query, final Node source){
278                final Set<Triple> outgoingTriples = Sets.newHashSet();
279                
280                ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){
281                        @Override
282                        public void visit(ElementTriplesBlock el) {
283                                Iterator<Triple> triples = el.patternElts();
284                    while (triples.hasNext()) {
285                        Triple triple = triples.next();
286                        Node subject = triple.getSubject();
287                        if(subject.equals(source)) {
288                                outgoingTriples.add(triple);
289                        }
290                    }
291                        }
292                        
293                        @Override
294                        public void visit(ElementPathBlock el) {
295                                Iterator<TriplePath> triplePaths = el.patternElts();
296                    while (triplePaths.hasNext()) {
297                        TriplePath tp = triplePaths.next();
298                        if(tp.isTriple()) {
299                                Triple triple = tp.asTriple();
300                                Node subject = triple.getSubject();
301                                if(subject.equals(source)) {
302                                        outgoingTriples.add(triple);
303                                }
304                        }
305                    }
306                        }
307                });
308                
309                return outgoingTriples;
310        }
311        
312        /**
313         * Given a SPARQL query and a start node, return the maximum subject-object
314         * join depth.
315         * @param query the query
316         * @param source the start node
317         * @return
318         */
319        public static int getSubjectObjectJoinDepth(Query query, final Node source){
320                final Set<Triple> outgoingTriples = Sets.newHashSet();
321                
322                ElementWalker.walk(query.getQueryPattern(), new ElementVisitorBase(){
323                        @Override
324                        public void visit(ElementTriplesBlock el) {
325                                Iterator<Triple> triples = el.patternElts();
326                    while (triples.hasNext()) {
327                        Triple triple = triples.next();
328                        Node subject = triple.getSubject();
329                        if(subject.equals(source) && triple.getObject().isVariable()) {
330                                outgoingTriples.add(triple);
331                        }
332                    }
333                        }
334                        
335                        @Override
336                        public void visit(ElementPathBlock el) {
337                                Iterator<TriplePath> triplePaths = el.patternElts();
338                    while (triplePaths.hasNext()) {
339                        TriplePath tp = triplePaths.next();
340                        if(tp.isTriple()) {
341                                Triple triple = tp.asTriple();
342                                Node subject = triple.getSubject();
343                                if(subject.equals(source) && triple.getObject().isVariable()) {
344                                        outgoingTriples.add(triple);
345                                }
346                        }
347                    }
348                        }
349                });
350                
351                int maxDepth = 0;
352                for (Triple triple : outgoingTriples) {
353                        maxDepth = Math.max(maxDepth, 1 + getSubjectObjectJoinDepth(query, triple.getObject()));
354                }
355                
356                return maxDepth;
357        }
358
359        public static CBDStructureTree getOptimalCBDStructure(Query query) {
360                CBDStructureTree tree = new CBDStructureTree("root");
361
362                Var var = query.getProjectVars().get(0);
363
364                getOptimalCBDStructure(query, tree, var.asNode(), null, "");
365
366                return tree;
367        }
368
369        private static void getOptimalCBDStructure(Query query, CBDStructureTree structureTree, Node current, Node parent, String direction) {
370                QueryUtils utils = new QueryUtils();
371
372                // traverse the outgoing paths
373                Set<Triple> tmp = utils.extractOutgoingTriplePatterns(query, current)
374                                .stream()
375                                .filter(tp -> !direction.equals("in") || !tp.getObject().matches(parent))
376                                .collect(Collectors.toSet());
377                if(!tmp.isEmpty()) {
378                        List<CBDStructureTree> outChildren = structureTree.getChildren().stream().filter(CBDStructureTree::isOutNode).collect(Collectors.toList());
379                        CBDStructureTree outChild;
380                        if(outChildren.isEmpty()) {
381                                outChild = structureTree.addOutNode();
382                        } else {
383                                outChild = outChildren.get(0);
384                        }
385                        tmp.stream()
386                                        .filter(tp -> tp.getObject().isVariable())
387                                        .map(Triple::getObject)
388                                        .forEach(node -> getOptimalCBDStructure(query, outChild, node, current, "out"));
389                }
390                // traverse the incoming paths
391                tmp = utils.extractIncomingTriplePatterns(query, current)
392                                .stream()
393                                .filter(tp -> !direction.equals("out") || !tp.getSubject().matches(parent))
394                                .collect(Collectors.toSet());
395                if(!tmp.isEmpty()) {
396                        CBDStructureTree inChild = structureTree.addInNode();
397
398                        tmp.stream()
399                                        .filter(tp -> tp.getSubject().isVariable())
400                                        .map(Triple::getSubject)
401                                        .forEach(node -> getOptimalCBDStructure(query, inChild, node, current, "in"));
402                }
403        }
404
405        /**
406         * Returns all variables that occur as object in a triple pattern of the SPARQL query.
407         * @param query the query
408         * @return
409         */
410        public static Set<Var> getObjectVars(Query query){
411                return getTriplePatterns(query).stream()
412                                .filter(tp -> tp.getObject().isVariable())
413                                .map(tp -> Var.alloc(tp.getObject()))
414                                .collect(Collectors.toSet());
415        }
416        
417        /**
418         * Returns all variables that occur as subject in a triple pattern of the SPARQL query.
419         * @param query the query
420         * @return
421         */
422        public Set<Var> getObjectVariables(Query query){
423                return extractTriplePattern(query, false).stream()
424                                .filter(tp -> tp.getObject().isVariable())
425                                .map(tp -> Var.alloc(tp.getObject()))
426                                .collect(Collectors.toSet());
427        }
428        
429        /**
430         * Returns all triple patterns in given SPARQL query that have the given node in subject position, i.e. the outgoing
431         * triple patterns.
432         * @param query The SPARQL query.
433         * @param node the node
434         * @return
435         */
436        public Set<Triple> extractOutgoingTriplePatterns(Query query, Node node){
437                return extractTriplePattern(query, false).stream()
438                                .filter(t -> t.subjectMatches(node))
439                                .collect(Collectors.toSet());
440        }
441
442        public Set<Triple> extractOutgoingTriplePatternsTrans(Query query, Node node) {
443                return Stream.concat(extractOutgoingTriplePatterns(query, node).stream(),
444                                                         extractOutgoingTriplePatterns(query, node).stream()
445                                                                         .map(tp -> extractOutgoingTriplePatternsTrans(query, tp.getObject()))
446                                                                         .flatMap(set -> set.stream()))
447                                .collect(Collectors.toSet());
448        }
449
450        /**
451         * Returns all triple patterns in given SPARQL query that have the given node in object position, i.e. the incoming
452         * triple patterns.
453         * @param query The SPARQL query.
454         * @param node the node
455         * @return
456         */
457        public Set<Triple> extractIncomingTriplePatterns(Query query, Node node){
458                return extractTriplePattern(query, false).stream()
459                                .filter(tp -> tp.objectMatches(node))
460                                .collect(Collectors.toSet());
461        }
462
463        public Set<Triple> extractIncomingTriplePatternsTrans(Query query, Node node) {
464                return Stream.concat(extractIncomingTriplePatterns(query, node).stream(),
465                                                         extractIncomingTriplePatterns(query, node).stream()
466                                                                         .map(tp -> extractIncomingTriplePatternsTrans(query, tp.getSubject()))
467                                                                         .flatMap(set -> set.stream()))
468                                .collect(Collectors.toSet());
469        }
470
471        /**
472         * Returns all triple patterns in given SPARQL query that have the given node either in subject or in object position, i.e. 
473         * the ingoing and outgoing triple patterns.
474         * @param query The SPARQL query.
475         * @param node the node
476         * @return
477         */
478        public Set<Triple> extractTriplePatterns(Query query, Node node){
479                Set<Triple> triplePatterns = new HashSet<>();
480                triplePatterns.addAll(extractIncomingTriplePatterns(query, node));
481                triplePatterns.addAll(extractOutgoingTriplePatterns(query, node));
482                return triplePatterns;
483        }
484        
485        /**
486         * Returns all triple patterns in given SPARQL query that contain the
487         * given predicate. 
488         * @param query the SPARQL query.
489         * @param predicate the predicate
490         * @return
491         */
492        public Set<Triple> extractTriplePatternsWithPredicate(Query query, Node predicate){
493                // get all triple patterns
494                Set<Triple> triplePatterns = extractTriplePattern(query);
495                
496                // filter by predicate
497                triplePatterns.removeIf(tp -> !tp.predicateMatches(predicate));
498                
499                return triplePatterns;
500        }
501        
502        /**
503         * Returns all triple patterns in given SPARQL query that have the given node either in subject or in object position, i.e. 
504         * the incoming and outgoing triple patterns.
505         * @param query The SPARQL query.
506         * @param node the node
507         * @return
508         */
509        public Set<Triple> extractNonOptionalTriplePatterns(Query query, Node node){
510                Set<Triple> triplePatterns = new HashSet<>();
511                triplePatterns.addAll(extractIncomingTriplePatterns(query, node));
512                triplePatterns.addAll(extractOutgoingTriplePatterns(query, node));
513                triplePatterns.removeAll(optionalTriplePattern);
514                return triplePatterns;
515        }
516        
517        /**
518         * Returns triple patterns for each projection variable v such that v is either in subject or object position.
519         * @param query The SPARQL query.
520         * @return
521         */
522        public Map<Var,Set<Triple>> extractTriplePatternsForProjectionVars(Query query){
523                Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>();
524                for (Var var : query.getProjectVars()) {
525                        Set<Triple> triplePatterns = new HashSet<>();
526                        triplePatterns.addAll(extractIncomingTriplePatterns(query, var));
527                        triplePatterns.addAll(extractOutgoingTriplePatterns(query, var));
528                        var2TriplePatterns.put(var, triplePatterns);
529                }
530                return var2TriplePatterns;
531        }
532        
533        /**
534         * Returns triple patterns for each projection variable v such that v is in subject position.
535         * @param query The SPARQL query.
536         * @return
537         */
538        public Map<Var,Set<Triple>> extractOutgoingTriplePatternsForProjectionVars(Query query){
539                Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>();
540                for (Var var : query.getProjectVars()) {
541                        Set<Triple> triplePatterns = new HashSet<>(extractOutgoingTriplePatterns(query, var));
542                        var2TriplePatterns.put(var, triplePatterns);
543                }
544                return var2TriplePatterns;
545        }
546        
547        /**
548         * @return the optionalTriplePattern
549         */
550        public Set<Triple> getOptionalTriplePatterns() {
551                return optionalTriplePattern;
552        }
553        
554        /**
555         * Returns triple patterns for each projection variable v such that v is in subject position.
556         * @param query The SPARQL query.
557         * @return
558         */
559        public Map<Var,Set<Triple>> extractIncomingTriplePatternsForProjectionVars(Query query){
560                Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>();
561                for (Var var : query.getProjectVars()) {
562                        Set<Triple> triplePatterns = new HashSet<>(extractIncomingTriplePatterns(query, var));
563                        var2TriplePatterns.put(var, triplePatterns);
564                }
565                return var2TriplePatterns;
566        }
567        
568        /**
569         * Returns triple patterns for each projection variable v such that v is in object position.
570         * @param query The SPARQL query.
571         * @return
572         */
573        public Map<Var,Set<Triple>> extractIngoingTriplePatternsForProjectionVars(Query query){
574                Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<>();
575                for (Var var : query.getProjectVars()) {
576                        Set<Triple> triplePatterns = new HashSet<>(extractIncomingTriplePatterns(query, var));
577                        var2TriplePatterns.put(var, triplePatterns);
578                }
579                return var2TriplePatterns;
580        }
581        
582        public Set<Triple> extractTriplePattern(Query query){
583                return extractTriplePattern(query, false);
584        }
585        
586        public Set<Triple> extractTriplePattern(Query query, boolean ignoreOptionals){
587                triplePattern = new HashSet<>();
588                optionalTriplePattern = new HashSet<>();
589                
590                query.getQueryPattern().visit(this);
591                
592                //postprocessing: triplepattern in OPTIONAL clause
593                if(!ignoreOptionals){
594                        if(query.isSelectType()){
595                                for(Triple t : optionalTriplePattern){
596                                        if(!ListUtils.intersection(new ArrayList<>(VarUtils.getVars(t)), query.getProjectVars()).isEmpty()){
597                                                triplePattern.add(t);
598                                        }
599                                }
600                        }
601                }
602                return triplePattern;
603        }
604        
605        public boolean isOptional(Triple triple){
606                return optionalTriplePattern.contains(triple);
607        }
608        
609        public Set<Triple> extractTriplePattern(ElementGroup group){
610                return extractTriplePattern(group, false);
611        }
612        
613        public Set<Triple> extractTriplePattern(ElementGroup group, boolean ignoreOptionals){
614                triplePattern = new HashSet<>();
615                optionalTriplePattern = new HashSet<>();
616                
617                group.visit(this);
618                
619                //postprocessing: triplepattern in OPTIONAL clause
620                if(!ignoreOptionals){
621                        triplePattern.addAll(optionalTriplePattern);
622                }
623                
624                return triplePattern;
625        }
626        
627        public Query removeUnboundObjectVarTriples(Query query) {
628                QueryUtils queryUtils = new QueryUtils();
629
630                Var rootVar = query.getProjectVars().get(0);
631
632                // 1. outgoing triple paths pruning
633                Set<Triple> outgoingTriplePatterns = queryUtils.extractOutgoingTriplePatternsTrans(query, rootVar);
634                Multimap<Var, Triple> var2OutgoingTriplePatterns = HashMultimap.create();
635
636                // mapping from variable to triple pattern
637                for (Triple tp : outgoingTriplePatterns) {
638                        var2OutgoingTriplePatterns.put(Var.alloc(tp.getSubject()), tp);
639                }
640
641                // remove triple patterns with object is var node and leaf node
642                Iterator<Triple> iterator = outgoingTriplePatterns.iterator();
643                while (iterator.hasNext()) {
644                        Triple triple = iterator.next();
645                        Node object = triple.getObject();
646                        if(object.isVariable() && !var2OutgoingTriplePatterns.containsKey(Var.alloc(object))) {
647                                iterator.remove();
648                        }
649                }
650
651                // 2. incoming triple paths pruning
652                Set<Triple> incomingTriplePatterns = queryUtils.extractIncomingTriplePatternsTrans(query, rootVar);
653                Multimap<Var, Triple> var2IncomingTriplePatterns = HashMultimap.create();
654
655                // mapping from variable to triple pattern
656                for (Triple tp : incomingTriplePatterns) {
657                        var2IncomingTriplePatterns.put(Var.alloc(tp.getObject()), tp);
658                }
659
660                // remove triple patterns with object is var node and leaf node
661                iterator = incomingTriplePatterns.iterator();
662                while (iterator.hasNext()) {
663                        Triple triple = iterator.next();
664                        Node s = triple.getSubject();
665                        if(s.isVariable() && !var2IncomingTriplePatterns.containsKey(Var.alloc(s))) {
666                                iterator.remove();
667                        }
668                }
669                
670                Query newQuery = new Query();
671                newQuery.addProjectVars(query.getProjectVars());
672                ElementTriplesBlock el = new ElementTriplesBlock();
673                for (Triple triple : Sets.union(outgoingTriplePatterns, incomingTriplePatterns)) {
674                        el.addTriple(triple);
675                }
676                newQuery.setQuerySelectType();
677                newQuery.setDistinct(true);
678                newQuery.setQueryPattern(el);
679                
680                return newQuery;
681        }
682        
683        /**
684         * Removes triple patterns of form (s rdf:type A) if there exists a
685         * triple pattern (s rdf:type B) such that the underlying
686         * knowledge base entails (B rdfs:subClassOf A).
687         * @param qef the query execution factory
688         * @param query the query
689         */
690        public void filterOutGeneralTypes(QueryExecutionFactory qef, Query query){
691                // extract all rdf:type triple patterns
692                Set<Triple> typeTriplePatterns = extractTriplePatternsWithPredicate(query, RDF.type.asNode());
693                
694                // group by subject
695                Multimap<Node, Triple> subject2TriplePatterns = HashMultimap.create();
696                for (Triple tp : typeTriplePatterns) {
697                        subject2TriplePatterns.put(tp.getSubject(), tp);
698                }
699                
700                // keep the most specific types for each subject
701                for (Node subject : subject2TriplePatterns.keySet()) {
702                        Collection<Triple> triplePatterns = subject2TriplePatterns.get(subject);
703                        Collection<Triple> triplesPatterns2Remove = new HashSet<>();
704                        
705                        for (Triple tp : triplePatterns) {
706                                if(!triplesPatterns2Remove.contains(tp)) {
707                                        // get all super classes for the triple object
708                                        Set<Node> superClasses = getSuperClasses(qef, tp.getObject());
709                                        
710                                        // remove triple patterns that have one of the super classes as object
711                                        for (Triple tp2 : triplePatterns) {
712                                                if(tp2 != tp && superClasses.contains(tp2.getObject())) {
713                                                        triplesPatterns2Remove.add(tp2);
714                                                }
715                                        }
716                                }
717                        }
718                        
719                        // remove triple patterns
720                        triplePatterns.removeAll(triplesPatterns2Remove);
721                }
722        }
723
724        public static DefaultDirectedGraph<Node, LabeledEdge> asJGraphT(Query query) {
725                QueryUtils utils = new QueryUtils();
726
727                Set<Triple> tps = utils.extractTriplePattern(query);
728
729                DefaultDirectedGraph<Node, LabeledEdge> g = new DefaultDirectedGraph<>(LabeledEdge.class);
730
731                tps.forEach(tp -> {
732                        g.addVertex(tp.getSubject());
733                        g.addVertex(tp.getObject());
734                        g.addEdge(tp.getSubject(), tp.getObject(), new LabeledEdge(tp.getSubject(), tp.getObject(), tp.getPredicate()));
735                });
736
737                return g;
738        }
739
740        public static void exportAsGraph(Query query, File file) {
741                DefaultDirectedGraph<Node, LabeledEdge> g = asJGraphT(query);
742                System.out.println(g.edgeSet().size());
743
744                JGraphXAdapter adapter = new JGraphXAdapter(g);
745
746
747                // positioning via jgraphx layouts
748                mxHierarchicalLayout layout = new mxHierarchicalLayout(adapter);
749                layout.execute(adapter.getDefaultParent());
750
751                Map<String, Object> edgeStyle = new HashMap<>();
752//edgeStyle.put(mxConstants.STYLE_EDGE, mxConstants.EDGESTYLE_ORTHOGONAL);
753                edgeStyle.put(mxConstants.STYLE_SHAPE,    mxConstants.SHAPE_CONNECTOR);
754                edgeStyle.put(mxConstants.STYLE_ENDARROW, mxConstants.ARROW_CLASSIC);
755                edgeStyle.put(mxConstants.STYLE_STROKECOLOR, "#000000");
756                edgeStyle.put(mxConstants.STYLE_FONTCOLOR, "#000000");
757                edgeStyle.put(mxConstants.STYLE_LABEL_BACKGROUNDCOLOR, "#ffffff");
758
759                Map<String, Object> nodeStyle = new HashMap<>();
760                nodeStyle.put(mxConstants.STYLE_SHAPE,    mxConstants.SHAPE_ELLIPSE);
761                nodeStyle.put(mxConstants.STYLE_VERTICAL_ALIGN, mxConstants.ALIGN_MIDDLE);
762
763                mxStylesheet stylesheet = new mxStylesheet();
764                stylesheet.setDefaultEdgeStyle(edgeStyle);
765                stylesheet.setDefaultVertexStyle(nodeStyle);
766
767                adapter.setStylesheet(stylesheet);
768
769//              JFrame frame = new JFrame();
770//              frame.getContentPane().add(new mxGraphComponent(adapter));
771//              frame.pack();
772//              frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE);
773//              frame.setVisible(true);
774
775
776
777                BufferedImage image = mxCellRenderer.createBufferedImage(adapter, null, 1, Color.WHITE, true, null);
778                mxPngEncodeParam param = mxPngEncodeParam.getDefaultEncodeParam(image);
779
780
781                try {
782                        FileOutputStream outputStream = new FileOutputStream(file);
783                        mxPngImageEncoder encoder = new mxPngImageEncoder(outputStream, param);
784                        if (image != null) {
785                                encoder.encode(image);
786                        }
787                        outputStream.close();
788//                      ImageIO.write(image, "PNG", file);
789                } catch (IOException e) {
790                        e.printStackTrace();
791                }
792
793        }
794        
795        private Set<Node> getSuperClasses(QueryExecutionFactory qef, Node cls){
796                Set<Node> superClasses = new HashSet<>();
797                
798                superClassesQueryTemplate.setIri("sub", cls.getURI());
799                
800                String query = superClassesQueryTemplate.toString();
801                
802                try {
803                        QueryExecution qe = qef.createQueryExecution(query);
804                        ResultSet rs = qe.execSelect();
805                        while(rs.hasNext()){
806                                QuerySolution qs = rs.next();
807                                superClasses.add(qs.getResource("sup").asNode());
808                        }
809                        qe.close();
810                } catch (Exception e) {
811                        logger.error("ERROR. Getting super classes of " + cls + " failed.", e);
812                }
813                
814                return superClasses;
815        }
816        
817        @Override
818        public void visit(ElementGroup el) {
819                parents.push(el);
820                for (Element e : el.getElements()) {
821                        e.visit(this);
822                }
823                parents.pop();
824        }
825
826        @Override
827        public void visit(ElementOptional el) {
828                optionalCount++;
829                inOptionalClause = true;
830                el.getOptionalElement().visit(this);
831                inOptionalClause = false;
832        }
833
834        @Override
835        public void visit(ElementTriplesBlock el) {
836                for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) {
837                        Triple t = iterator.next();
838                        if(inOptionalClause){
839                                optionalTriplePattern.add(t);
840                        } else {
841                                triplePattern.add(t);
842                        }
843                        if(!parents.isEmpty()){
844                                triple2Parent.put(t, parents.peek());
845                        }
846                }
847        }
848
849        @Override
850        public void visit(ElementPathBlock el) {
851                for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) {
852                        TriplePath tp = iterator.next();
853                        if(inOptionalClause){
854                                if(tp.isTriple()){
855                                        optionalTriplePattern.add(tp.asTriple());
856                                        if(!parents.isEmpty()){
857                                                triple2Parent.put(tp.asTriple(), parents.peek());
858                                        }
859                                }
860                        } else {
861                                if(tp.isTriple()){
862                                        triplePattern.add(tp.asTriple());
863                                        if(!parents.isEmpty()){
864                                                triple2Parent.put(tp.asTriple(), parents.peek());
865                                        }
866                                }
867                        }
868                        
869                }
870        }
871
872        @Override
873        public void visit(ElementUnion el) {
874                unionCount++;
875                for (Element e : el.getElements()) {
876                        e.visit(this);
877                }
878        }
879        
880        @Override
881        public void visit(ElementFilter el) {
882                filterCount++;
883        }
884
885        public int getUnionCount() {
886                return unionCount;
887        }
888
889        public int getOptionalCount() {
890                return optionalCount;
891        }
892
893        public int getFilterCount() {
894                return filterCount;
895        }
896        
897        /**
898         * Returns the ElementGroup object containing the triple pattern.
899         * @param triple the triple patterm
900         * @return
901         */
902        public ElementGroup getElementGroup(Triple triple){
903                return triple2Parent.get(triple);
904        }
905
906        public static class LabeledEdge extends DefaultEdge {
907                private final Node s;
908                private final Node t;
909                private final Node edge;
910
911                public LabeledEdge(Node s, Node t, Node edge) {
912                        this.s = s;
913                        this.t = t;
914                        this.edge = edge;
915                }
916
917                public Node getEdge() {
918                        return edge;
919                }
920
921                @Override
922                public String toString() {
923                        return edge.toString();
924                }
925        }
926        
927        public static void main(String[] args) throws Exception {
928                Query q = QueryFactory.create(
929                                "PREFIX  dbp:  <http://dbpedia.org/resource/>\n" + 
930                                "PREFIX  dbo: <http://dbpedia.org/ontology/>\n" + 
931                                "SELECT  ?thumbnail\n" + 
932                                "WHERE\n" + 
933                                "  { dbp:total dbo:thumbnail ?thumbnail }");
934                QueryUtils queryUtils = new QueryUtils();
935                System.out.println(queryUtils.extractOutgoingTriplePatterns(q, q.getProjectVars().get(0)));
936                System.out.println(queryUtils.extractIncomingTriplePatterns(q, q.getProjectVars().get(0)));
937                
938                q = QueryFactory.create("SELECT DISTINCT  ?x0\n" + 
939                                "WHERE\n" + 
940                                "  { ?x0  <http://dbpedia.org/ontology/activeYearsEndYear>  ?date5 ;\n" + 
941                                "         <http://dbpedia.org/ontology/activeYearsStartYear>  ?date4 ;\n" + 
942                                "         <http://dbpedia.org/ontology/birthDate>  ?date0 ;\n" + 
943                                "         <http://dbpedia.org/ontology/birthPlace>  <http://dbpedia.org/resource/Austria> ;\n" + 
944                                "         <http://dbpedia.org/ontology/birthPlace>  <http://dbpedia.org/resource/Austria-Hungary> ;\n" + 
945                                "         <http://dbpedia.org/ontology/birthPlace>  <http://dbpedia.org/resource/Vienna> ;\n" + 
946                                "         <http://dbpedia.org/ontology/birthYear>  ?date3 ;\n" + 
947                                "         <http://dbpedia.org/ontology/deathDate>  ?date2 ;\n" + 
948                                "         <http://dbpedia.org/ontology/deathPlace>  <http://dbpedia.org/resource/Berlin> ;\n" + 
949                                "         <http://dbpedia.org/ontology/deathPlace>  <http://dbpedia.org/resource/Germany> ;\n" + 
950                                "         <http://dbpedia.org/ontology/deathYear>  ?date1 ;\n" + 
951                                "         <http://dbpedia.org/ontology/occupation>  <http://dbpedia.org/resource/Hilde_K%C3%B6rber__1> ;\n" + 
952                                "         <http://dbpedia.org/ontology/viafId>  \"32259546\" ;\n" + 
953                                "         <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  <http://dbpedia.org/ontology/Person> .\n" + 
954                                "    FILTER ( ( str(?date0) = \"1906-07-03+02:00\" ) || ( str(?date0) = \"1906-07-03\" ) )\n" + 
955                                "    FILTER ( ( str(?date1) = \"1969+02:00\" ) || ( str(?date1) = \"1969-01-01\" ) )\n" + 
956                                "    FILTER ( ( str(?date2) = \"1969-05-31+02:00\" ) || ( str(?date2) = \"1969-05-31\" ) )\n" + 
957                                "    FILTER ( ( str(?date3) = \"1906+02:00\" ) || ( str(?date3) = \"1906-01-01\" ) )\n" + 
958                                "    FILTER ( ( str(?date4) = \"1930+02:00\" ) || ( str(?date4) = \"1930-01-01\" ) )\n" + 
959                                "    FILTER ( ( str(?date5) = \"1964+02:00\" ) || ( str(?date5) = \"1964-01-01\" ) )\n" + 
960                                "  }");
961                
962                System.out.println(queryUtils.removeUnboundObjectVarTriples(q));
963                
964                String query = "SELECT DISTINCT ?s WHERE {"
965                                + "?s a <http://dbpedia.org/ontology/BeautyQueen> ."
966                                + "?s <http://dbpedia.org/ontology/birthPlace> ?o0 ."
967                                + "?o0 <http://dbpedia.org/ontology/isPartOf> ?o1 ."
968                                + "?o1 <http://dbpedia.org/ontology/timeZone> <http://dbpedia.org/resource/Eastern_Time_Zone> .}";
969                
970                System.out.println(QueryUtils.getSubjectObjectJoinDepth(QueryFactory.create(query), Var.alloc("s")));
971
972                System.out.println(queryUtils.extractOutgoingTriplePatternsTrans(QueryFactory.create(query), Var.alloc("s")));
973        }
974
975
976}