001package org.dllearner.algorithms.qtl.operations.tuples;
002
003import java.io.File;
004import java.util.*;
005import java.util.concurrent.atomic.AtomicInteger;
006import java.util.function.Function;
007import java.util.stream.Collector;
008import java.util.stream.Collectors;
009import java.util.stream.Stream;
010
011import com.google.common.base.StandardSystemProperty;
012import com.google.common.collect.Lists;
013import com.google.common.collect.Maps;
014import com.google.common.collect.Sets;
015import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
016import org.apache.jena.graph.Node;
017import org.apache.jena.graph.NodeFactory;
018import org.apache.jena.query.*;
019import org.apache.jena.rdf.model.Model;
020import org.apache.jena.shared.PrefixMapping;
021import org.apache.jena.sparql.core.Var;
022import org.apache.jena.sparql.util.FmtUtils;
023import org.apache.logging.log4j.LogManager;
024import org.dllearner.algorithms.qtl.QueryTreeUtils;
025import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
026import org.dllearner.algorithms.qtl.datastructures.rendering.Edge;
027import org.dllearner.algorithms.qtl.datastructures.rendering.Vertex;
028import org.dllearner.algorithms.qtl.exception.QTLException;
029import org.dllearner.algorithms.qtl.impl.QueryTreeFactory;
030import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase;
031import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBaseInv;
032import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator;
033import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorSimple;
034import org.dllearner.algorithms.qtl.operations.traversal.PreOrderTreeTraversal;
035import org.dllearner.algorithms.qtl.operations.traversal.TreeTraversal;
036import org.dllearner.algorithms.qtl.util.filters.AbstractTreeFilter;
037import org.dllearner.algorithms.qtl.util.filters.MostSpecificTypesFilter;
038import org.dllearner.algorithms.qtl.util.filters.PredicateExistenceFilterDBpedia;
039import org.dllearner.algorithms.qtl.util.filters.SymmetricPredicatesFilter;
040import org.dllearner.algorithms.qtl.util.vocabulary.DBpedia;
041import org.dllearner.core.AbstractReasonerComponent;
042import org.dllearner.kb.SparqlEndpointKS;
043import org.dllearner.kb.sparql.*;
044import org.dllearner.reasoning.SPARQLReasoner;
045import org.dllearner.utilities.QueryUtils;
046import org.jgrapht.Graph;
047import org.jgrapht.io.ExportException;
048import org.jgrapht.io.GraphMLExporter;
049import org.jgrapht.io.IntegerComponentNameProvider;
050import static java.util.stream.Collectors.groupingBy;
051import static java.util.stream.Collectors.toList;
052
053/**
054 * Experimental algorithm to generate SPARQL queries by example each of which is a tuple of RDF nodes, i.e.
055 * either a resource or literal.
056 *
057 * For example
058 * (:a 15) and (:b 16)
059 * (:a :x "15-11-2000") and (:b :x "15-11-2000")
060 *
061 * The result should be a SPARQL query with n projection variables where n denotes the arity of the tuple (n-tuple).
062 *
063 * @author Lorenz Buehmann
064 */
065public class QTLTuples {
066
067    private static final org.apache.logging.log4j.Logger log = LogManager.getLogger(QTLTuples.class);
068
069    private final QueryExecutionFactory qef;
070
071    private ConciseBoundedDescriptionGenerator cbdGen;
072    private QueryTreeFactory treeFactory;
073    private LGGGenerator lggGenerator;
074
075    // used for rendering
076    private PrefixMapping pm;
077    private String baseIRI;
078
079    private int maxTreeDepth = 1;
080
081    private Set<AbstractTreeFilter<RDFResourceTree>> treeFilters = new LinkedHashSet<>();
082    public boolean addTreeFilter(AbstractTreeFilter<RDFResourceTree> treeFilter) {
083        return treeFilters.add(treeFilter);
084    }
085    public boolean removeTreeFilter(AbstractTreeFilter<RDFResourceTree> treeFilter) {
086        return treeFilters.remove(treeFilter);
087    }
088
089
090    public QTLTuples(QueryExecutionFactory qef) {
091        this.qef = qef;
092
093        cbdGen = new ConciseBoundedDescriptionGeneratorImpl(qef);
094        treeFactory = new QueryTreeFactoryBase();
095        lggGenerator = new LGGGeneratorSimple();
096    }
097
098    /**
099     * Run the QTL algorithm given the 2 tuples as input example.
100     *
101     * @param tuple1 the first example
102     * @param tuple2 the second example
103     */
104    public void run(List<Node> tuple1, List<Node> tuple2) {
105        Objects.requireNonNull(tuple1,"First tuple must not be null");
106        Objects.requireNonNull(tuple2,"Second tuple must not be null");
107
108        run(Lists.newArrayList(tuple1, tuple2));
109    }
110
111    /**
112     * Run the QTL algorithm given the list of tuples as input examples.
113     *
114     * The elements of a tuple <code>t = (e_1, ..., e_n)</code> represent arbitrary RDF terms, i.e. each <code>e_i</code> can be
115     * either an IRI, a literal, or a blank node.
116     *
117     * <p>
118     *  Requirements:
119     *  <ul>
120     *      <li>at least 2 tuples </li>
121     *      <li>a tuple must contain at least one element</li>
122     *      <li>for all tuples the number of elements must be the same</li>
123     *  </ul>
124     * </p>
125     *
126     * @param tuples the examples
127     */
128    public List<Map.Entry<RDFResourceTree, List<Node>>> run(List<List<Node>> tuples) {
129        Objects.requireNonNull(tuples,"Tuples must not be null");
130
131        // sanity checks first
132        checkInput(tuples);
133
134        log.info("input tuples {}", tuples.stream().map(Object::toString).collect(Collectors.joining("\n")));
135
136        // handle case with tuples of length separately -> just use the LGG of the trees
137        if(tuples.get(0).size() == 1) {
138            return runSingleNodeTuples(tuples);
139        }
140
141
142        // 1. we have to retrieve data for each node
143        // in particular each resource node
144        // for literals it could be to complicated as
145        // a) there are no outgoing triples and
146        // b) the number of incoming triples could be too large as literals like numbers could be used anywhere as value
147
148        // 2. for each mapping of trees, build graph(s) of connected trees
149
150
151        List<Map<String, Map.Entry<RDFResourceTree, List<Node>>>> tuple2Trees = tuples.stream().map(this::connect).collect(toList());
152//        List<Map<String, Map.Entry<RDFResourceTree, List<Node>>>> tuple2Trees = tuples.stream().map(this::computeConnectedTrees).collect(toList());
153
154        // cluster by key
155        Map<String, ArrayList<Map.Entry<RDFResourceTree, List<Node>>>> grouped = tuple2Trees.stream()
156                .flatMap(m -> m.entrySet().stream())
157                .collect(groupingBy(Map.Entry::getKey,
158                        Collector.of(ArrayList::new, (s, p) -> s.add(p.getValue()), (s1, s2) -> {
159                            s1.addAll(s2);
160                            return s1;
161                        })));
162
163        // compute LGG per each key
164        List<Map.Entry<RDFResourceTree, List<Node>>> solutions = grouped.entrySet().stream()
165                .filter(e -> e.getValue().size() == tuples.size())
166                .flatMap(entry -> {
167                    log.debug("computing LGG for " + entry.getKey());
168
169                    List<Map.Entry<RDFResourceTree, List<Node>>> list = entry.getValue();
170
171                    List<RDFResourceTree> trees = list.stream().map(Map.Entry::getKey).collect(toList());
172
173                    List<Node> nodes2Select = list.get(0).getValue();
174
175                    trees.forEach(t -> log.trace("tree:\n{}", t::getStringRepresentation));
176
177                    RDFResourceTree lgg = lggGenerator.getLGG(trees);
178                    log.debug("lgg:\n{}", lgg::getStringRepresentation);
179
180                    if(lgg.isResourceNode()) {
181                        log.warn("lgg was not generalizing with root {}", lgg);
182                        return Stream.empty();
183                    }
184
185//            System.out.println("LGG\n" + lgg.getStringRepresentation());
186//            System.out.println(QueryTreeUtils.toSPARQLQueryString(lgg, nodes2Select, null, PrefixMapping.Standard));
187
188                    return Stream.of(Maps.immutableEntry(lgg, nodes2Select));
189                })
190                .collect(Collectors.toList());
191
192        return solutions;
193    }
194
195    private List<Map.Entry<RDFResourceTree, List<Node>>> runSingleNodeTuples(List<List<Node>> tuples) {
196        // map nodes to trees
197        List<RDFResourceTree> trees = tuples.stream()
198                .flatMap(Collection::stream) // flatten list of lists of nodes
199                .map(this::asTree) // map node to tree
200                .map(Optional::get)
201                .collect(Collectors.toList());
202
203        // compute LGG
204        RDFResourceTree lgg = lggGenerator.getLGG(trees);
205        log.debug("lgg:\n{}", lgg::getStringRepresentation);
206
207        return Collections.singletonList(Maps.immutableEntry(lgg, Collections.emptyList()));
208    }
209
210    private void checkInput(List<List<Node>> tuples) {
211        Objects.requireNonNull(tuples,"Tuples must not be null");
212
213        // check for at least 2 tuples
214        if(tuples.size() < 2) {
215            log.warn("Min. number of input tuples is 2.");
216            throw new IllegalArgumentException("Min. number of input tuples is 2.");
217        }
218
219        // check for all tuples having same length
220        boolean sameTupleLength = tuples.stream().mapToInt(List::size).distinct().count() == 1;
221        if(!sameTupleLength) {
222            log.warn("Not all tuples have the same length. Currently, this is required!");
223            throw new IllegalArgumentException("Not all tuples have the same length. Currently, this is required!");
224        }
225    }
226
227    private RDFResourceTree applyFilters(RDFResourceTree tree, List<Node> nodes2Keep) {
228        RDFResourceTree filteredTree = tree;
229
230        for (AbstractTreeFilter<RDFResourceTree> f : treeFilters) {
231            f.setNodes2Keep(nodes2Keep);
232            filteredTree = f.apply(filteredTree);
233        }
234
235        return filteredTree;
236    }
237
238//    private Map<Node, RDFResourceTree> asTrees(Set<Node> nodes) {
239//
240//    }
241
242    private boolean useLiteralData = true;
243
244    public void setUseLiteralData(boolean useLiteralData) {
245        this.useLiteralData = useLiteralData;
246    }
247
248    int cnt = 0;
249
250    private Map<String, Map.Entry<RDFResourceTree, List<Node>>> connect(List<Node> tuple) {
251        log.debug("generating connected tree for tuple {}", tuple);
252
253        // filter URI resources
254        Set<String> resources = tuple.stream()
255                .filter(Node::isURI)
256                .map(Node::getURI)
257                .collect(Collectors.toSet());
258
259        // map to one large model
260        Model model = cbdGen.getConciseBoundedDescription(resources);
261        if(model.isEmpty()) {
262            throw new RuntimeException(new QTLException("Could not get data for tuple " + tuple));
263        }
264        log.debug("#triples:{}", model.size());
265
266//        PseudoGraphJenaGraph g = new PseudoGraphJenaGraph(model.getGraph());
267
268//        List<RDFNode> steinerNodes = tuple.stream().map(model::asRDFNode).collect(Collectors.toList());
269//        SteinerTreeGeneric<RDFNode, Statement> steinerTreeGen = new SteinerTreeGeneric<>(g, steinerNodes,
270//                new EdgeFactoryJenaModel(model, anyProp));
271//        WeightedMultigraph<RDFNode, Statement> steinerTree = steinerTreeGen.getDefaultSteinerTree();
272
273//        GraphMLExporter<Node, Triple> exporter = new GraphMLExporter<>(
274//                Node::toString, n -> FmtUtils.stringForNode(n, pm),
275//                new IntegerComponentNameProvider<>(), e -> FmtUtils.stringForNode(e.getPredicate(), pm));
276//        try {
277//            exporter.exportGraph(g, new FileWriter(new File("/tmp/steiner_tree_" + cnt++ + ".graphml")));
278//        } catch (IOException | ExportException e) {
279//            log.error("failed to write graph to file", e);
280//        }
281
282        List<Node> nodes = new ArrayList<>(tuple);
283
284        // starting from each node n, create the tree with n as root
285        Map<String, Map.Entry<RDFResourceTree, List<Node>>> result = new TreeMap<>();
286        tuple.stream()
287                .filter(Node::isURI)
288                .forEach(node -> {
289                    RDFResourceTree tree = treeFactory.getQueryTree(node.getURI(), model, 3);
290                    Set<Node> nodes2Select = new LinkedHashSet<>(nodes);
291//                    nodes.remove(node);
292//                    Set<String> keys = keys(tree, asNodes(nodes));
293
294//                    System.out.println(nodes);
295//                    System.out.println(node);
296//                    System.out.println(tree.getStringRepresentation());
297//                    System.out.println(QueryTreeUtils.getNodes(tree).containsAll(nodes));
298
299                    String key = Integer.toString(nodes.indexOf(node));
300                    if(QueryTreeUtils.getNodeLabels(tree).containsAll(nodes)) {
301                        nodes2Select.remove(node);
302
303                        Set<Node> nodes2Project = new LinkedHashSet<>();
304                        nodes2Select.forEach(n -> {
305                            Node anchor = NodeFactory.createBlankNode("var" + tuple.indexOf(n));
306                            getMatchingTreeNodes(tree, n).forEach(child -> {
307                                child.setAnchorVar(anchor);
308                                nodes2Project.add(anchor);
309                            });
310                        });
311                        log.debug("connected tree\n{}", tree::getStringRepresentation);
312                        QueryTreeUtils.asGraph(tree, baseIRI, pm, new File(System.getProperty("java.io.tmpdir") + File.separator + "tree-" + FmtUtils.stringForNode(node, pm) + ".graphml"));
313                        result.put(key, Maps.immutableEntry(tree, new ArrayList<>(nodes2Project)));
314                    };
315
316
317
318                });
319
320
321        log.debug("got {} possible connected trees", result.size());
322        return result;
323    }
324
325//    private Set<String> keys(RDFResourceTree tree, List<Node> nodes) {
326//        List<RDFResourceTree> children = tree.getChildren();
327//        String key = "" + nodes.indexOf(tree.getData());
328//        children.stream().map(child -> {
329//            int pos = nodes.indexOf(child.getData());
330//            if( pos >= 0) {
331//                key += pos + ""
332//            }
333//        });
334//
335//    }
336
337
338    private Map<String, Map.Entry<RDFResourceTree, List<Node>>> computeConnectedTrees(List<Node> tuple) {
339
340        // mapping to tree for each node in tuple
341        Map<Node, Optional<RDFResourceTree>> mapping = mapping(tuple);
342
343        Map<String, Map.Entry<RDFResourceTree, List<Node>>> key2Trees = new HashMap<>();
344
345        mapping.forEach((node, tree) -> {
346            if(node.isURI()) {
347                final StringBuilder key = new StringBuilder(tuple.indexOf(node));
348                List<Node> nodes2Select = new ArrayList<>();
349
350                RDFResourceTree newTree = new RDFResourceTree(tree.get());
351
352                final AtomicInteger modified = new AtomicInteger(0);
353
354                mapping.forEach((otherNode, otherTree) -> {
355                    if(!node.equals(otherNode)) {
356                        List<RDFResourceTree> matchingTreeNodes = getMatchingTreeNodes(newTree, otherNode);
357
358                        if(!matchingTreeNodes.isEmpty()) {
359                            modified.set(1);
360                            key.append(tuple.indexOf(otherNode));
361                        }
362
363                        // plugin tree of other node
364                        matchingTreeNodes.forEach(treeNode -> {
365                            if(treeNode.isResourceNode()) {
366                                Node edge = treeNode.getEdgeToParent();
367                                RDFResourceTree parent = treeNode.getParent();
368                                // copy the tree that will be attached in the current tree
369                                RDFResourceTree newChild = new RDFResourceTree(otherTree.get());
370                                // replace the data with some anchor
371                                Node newData = NodeFactory.createBlankNode("var" + tuple.indexOf(otherNode));
372//                                newChild.setData(newData);
373                                newChild.setAnchorVar(newData);
374
375                                // attach the tree as child node
376                                parent.replaceChild(treeNode, newChild, edge);
377
378                                parent.addChild(newChild, edge);
379                                nodes2Select.add(newData);
380//                                System.out.println("TEST\n" + newTree.getStringRepresentation());
381                            } else {
382                                Node edge = treeNode.getEdgeToParent();
383                                RDFResourceTree parent = treeNode.getParent();
384                                parent.removeChild(treeNode, edge);
385                                Node newData = NodeFactory.createBlankNode("var" + tuple.indexOf(otherNode));
386//                                treeNode.setData(newData);
387                                treeNode.setAnchorVar(newData);
388                                parent.addChild(treeNode, edge);
389                                nodes2Select.add(newData);
390                            }
391
392                        });
393
394
395                    }
396                });
397                if(modified.get() == 1) {
398                    log.debug("connected tree({}):\n{}", () -> key, newTree::getStringRepresentation);
399
400//                    QueryTreeUtils.asGraph(newTree, baseIRI, pm, new File("/tmp/tree-" + pm.shortForm(node.getURI()) + ".graphml"));
401                    key2Trees.put(key.toString(), Maps.immutableEntry(newTree, nodes2Select));
402                }
403            }
404        });
405
406        return key2Trees;
407    }
408
409    /**
410     * Find nodes matching the data in the given tree.
411     */
412    private List<RDFResourceTree> getMatchingTreeNodes(RDFResourceTree tree, Node node) {
413        List<RDFResourceTree> treeNodes = new ArrayList<>();
414
415        TreeTraversal<RDFResourceTree> treeTraversal = new PreOrderTreeTraversal<>(tree);
416        treeTraversal.forEachRemaining(treeNode -> {
417           if(treeNode.getData().matches(node)) {
418               treeNodes.add(treeNode);
419            }
420        });
421
422        return treeNodes;
423    }
424
425    private Optional<RDFResourceTree> asTree(Node node) {
426        if (node.isURI()) {
427            if(useIncomingTriples) {
428                TreeBasedConciseBoundedDescriptionGenerator treeCBDGen = new TreeBasedConciseBoundedDescriptionGenerator(qef);
429                try {
430                    Model cbd = treeCBDGen.getConciseBoundedDescription(node.getURI(), CBDStructureTree.fromTreeString("root:[in:[out:[]],out:[]]"));
431                    return Optional.of(treeFactory.getQueryTree(node.toString(), cbd, 2));
432                } catch (Exception e) {
433                    log.error("Failed to compute CBD for " + node, e);
434                }
435                return Optional.empty();
436            } else {
437                String iri = node.getURI();
438                Model cbd = cbdGen.getConciseBoundedDescription(iri, maxTreeDepth);
439                RDFResourceTree tree = treeFactory.getQueryTree(node.getURI(), cbd, maxTreeDepth);
440                log.debug("tree({}):\n{}", node::toString, tree::getStringRepresentation);
441                return Optional.of(tree);
442            }
443        } else {
444            if(useIncomingTriples) {
445                TreeBasedConciseBoundedDescriptionGenerator treeCBDGen = new TreeBasedConciseBoundedDescriptionGenerator(qef);
446                try {
447                    Model cbd = treeCBDGen.getConciseBoundedDescription(node.getLiteral(), CBDStructureTree.fromTreeString("root:[in:[out:[]]]"));
448                    return Optional.of(treeFactory.getQueryTree(node.toString(), cbd, maxTreeDepth));
449                } catch (Exception e) {
450                    log.error("Failed to compute CBD for " + node, e);
451                }
452            } else {
453                return Optional.of(new RDFResourceTree(node));
454            }
455            return Optional.empty();
456        }
457    }
458
459    private LinkedHashMap<Node, Optional<RDFResourceTree>> mapping(List<Node> tuple) {
460        return tuple.stream().collect(
461                Collectors.toMap(
462                        Function.identity(),
463                        this::asTree,
464                        (u, v) -> {
465                            throw new IllegalStateException(String.format("Duplicate key %s", u));
466                        },
467                        LinkedHashMap::new)
468        );
469    }
470
471    private boolean useIncomingTriples = false;
472
473    public boolean isUseIncomingTriples() {
474        return useIncomingTriples;
475    }
476
477    public void setUseIncomingTriples(boolean useIncomingTriples) {
478        this.useIncomingTriples = useIncomingTriples;
479    }
480
481    /**
482     * @param cbdGen the generator used to create the CBD for each resource in an input tuple
483     */
484    public void setCBDGenerator(ConciseBoundedDescriptionGenerator cbdGen) {
485        this.cbdGen = cbdGen;
486    }
487
488    /**
489     * @param treeFactory the factory used to create a tree from a resource and its set of triples (CBD)
490     */
491    public void setTreeFactory(QueryTreeFactory treeFactory) {
492        this.treeFactory = treeFactory;
493    }
494
495    /**
496     * @param lggGenerator the LGG generator used during the QTL algorithm
497     */
498    public void setLggGenerator(LGGGenerator lggGenerator) {
499        this.lggGenerator = lggGenerator;
500    }
501
502    /**
503     * @param maxTreeDepth max. tree depth used for data retrieval and query tree generation of the input examples.
504     */
505    public void setMaxTreeDepth(int maxTreeDepth) {
506        this.maxTreeDepth = maxTreeDepth;
507    }
508
509    public void setPrefixMapping(PrefixMapping pm) {
510        this.pm = pm;
511    }
512
513    public void setBaseIRI(String baseIRI) {
514        this.baseIRI = baseIRI;
515    }
516
517    public static void main(String[] args) throws Exception {
518        System.setProperty("logFilename", "log4j2.properties");
519//        org.apache.log4j.Logger.getRootLogger().getLoggerRepository().resetConfiguration();
520//        org.apache.log4j.Logger.getRootLogger().setLevel(Level.DEBUG);
521//        org.apache.log4j.Logger.getLogger(QTLTuples.class).setLevel(Level.DEBUG);
522
523        String queryStr = "select * where { " +
524                "?company a <http://dbpedia.org/ontology/Organisation> . " +
525                "?company <http://dbpedia.org/ontology/foundationPlace> <http://dbpedia.org/resource/California> . " +
526                "?product <http://dbpedia.org/ontology/developer> ?company . " +
527                "?product a <http://dbpedia.org/ontology/Software> . }";
528
529        queryStr = "PREFIX foaf: <http://xmlns.com/foaf/0.1/> " +
530                "PREFIX dbp: <http://dbpedia.org/property/> " +
531                "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
532                "PREFIX dbont: <http://dbpedia.org/ontology/> " +
533                "SELECT ?name ?date WHERE { " +
534                "?name dbont:artist <http://dbpedia.org/resource/The_Beatles> . " +
535                "?name rdf:type dbont:Album . " +
536                "?name dbont:releaseDate ?date}" +
537                " ORDER BY ?date";
538
539        queryStr = "PREFIX dbpedia: <http://dbpedia.org/ontology/> \n" +
540                "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" +
541                "SELECT distinct ?b ?c WHERE { \n" +
542                "?b rdf:type dbpedia:Film . \n" +
543                "?c rdf:type dbpedia:Artist . \n" +
544                "?b dbpedia:director <http://dbpedia.org/resource/Clint_Eastwood> ; \n" +
545                "dbpedia:starring ?c . }";
546
547        queryStr = "SELECT DISTINCT  *\n" +
548                "WHERE\n" +
549                "  { ?company  a                     <http://dbpedia.org/ontology/Organisation> ;\n" +
550                "              <http://dbpedia.org/ontology/foundationPlace>  <http://dbpedia.org/resource/California> .\n" +
551                "    ?product  <http://dbpedia.org/ontology/developer>  ?company ;\n" +
552                "              a                     <http://dbpedia.org/ontology/Software>\n" +
553                "  }";
554
555        queryStr = "PREFIX  :     <http://dbpedia.org/resource/>\n" +
556                "PREFIX  dbo:  <http://dbpedia.org/ontology/>\n" +
557                "PREFIX  owl:  <http://www.w3.org/2002/07/owl#>\n" +
558                "PREFIX  rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
559                "PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>\n" +
560                "PREFIX  skos: <http://www.w3.org/2004/02/skos/core#>\n" +
561                "PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" +
562                "PREFIX  dbpedia: <http://dbpedia.org/>\n" +
563                "PREFIX  dbpedia2: <http://dbpedia.org/property/>\n" +
564                "PREFIX  foaf: <http://xmlns.com/foaf/0.1/>\n" +
565                "PREFIX  dc:   <http://purl.org/dc/elements/1.1/>\n" +
566                "\n" +
567                "SELECT DISTINCT  ?name ?birth ?death ?person\n" +
568                "WHERE\n" +
569                "  { ?person  dbpedia2:birthPlace  :France ;\n" +
570                "             dbo:birthDate        ?birth ;\n" +
571                "             foaf:name            ?name ;\n" +
572                "             dbo:deathDate        ?death\n" +
573                "  }\n" +
574                "ORDER BY ?name";
575        queryStr = "PREFIX  :     <http://dbpedia.org/resource/>\n" +
576                "PREFIX  rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
577                "PREFIX  db-ont: <http://dbpedia.org/ontology/>\n" +
578                "PREFIX  dbpedia: <http://dbpedia.org/>\n" +
579                "PREFIX  dbpedia2: <http://dbpedia.org/property/>\n" +
580                "SELECT DISTINCT  *\n" +
581                "WHERE\n" +
582                "  { ?select  rdf:type        db-ont:Film ;\n" +
583                "             dbpedia2:title  ?ft .\n" +
584                "    ?id      db-ont:imdbId   ?imdb_id\n" +
585                "  }";
586
587        queryStr = "SELECT DISTINCT  *\n" +
588                "WHERE\n" +
589                "  { <http://dbpedia.org/resource/United_States>\n" +
590                "              a                     <http://dbpedia.org/ontology/Country> ;\n" +
591                "              ?p                    ?o\n" +
592                "  }";
593
594        queryStr = "PREFIX  :     <http://dbpedia.org/resource/>\n" +
595                "PREFIX  dbpedia-owl: <http://dbpedia.org/ontology/>\n" +
596                "PREFIX  owl:  <http://www.w3.org/2002/07/owl#>\n" +
597                "PREFIX  rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
598                "PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>\n" +
599                "PREFIX  skos: <http://www.w3.org/2004/02/skos/core#>\n" +
600                "PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" +
601                "PREFIX  dbpedia: <http://dbpedia.org/>\n" +
602                "PREFIX  dbpedia2: <http://dbpedia.org/property/>\n" +
603                "PREFIX  foaf: <http://xmlns.com/foaf/0.1/>\n" +
604                "PREFIX  dc:   <http://purl.org/dc/elements/1.1/>\n" +
605                "\n" +
606                "SELECT DISTINCT  ?homepage\n" +
607                "WHERE\n" +
608                "  { ?person  rdf:type       dbpedia-owl:Place ;\n" +
609                "             foaf:homepage  ?homepage\n" +
610                "  }";
611
612        queryStr = "PREFIX  property: <http://dbpedia.org/property/>\n" +
613                "PREFIX  foaf: <http://xmlns.com/foaf/0.1/>\n" +
614                "PREFIX  db:   <http://dbpedia.org/ontology/>\n" +
615                "\n" +
616                "SELECT DISTINCT  *\n" +
617                "WHERE\n" +
618                "  { ?musician  a                    db:MusicalArtist ;\n" +
619                "              db:activeYearsStartYear  ?activeyearsstartyear ;\n" +
620                "              db:associatedBand     ?associatedband ;\n" +
621                "              db:birthPlace         ?birthplace ;\n" +
622                "              db:genre              ?genre ;\n" +
623                "              db:recordLabel        ?recordlable\n" +
624                "  }";
625
626        int limit = 10;
627
628        Query query = QueryFactory.create(queryStr);
629        query.setOffset(0);
630        query.setLimit(limit);
631
632        System.out.println("Input query:\n" + query);
633
634        String baseIRI = DBpedia.BASE_IRI;
635        PrefixMapping pm = DBpedia.PM;
636
637        SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
638        endpoint = SparqlEndpoint.create("http://localhost:7200/repositories/dbpedia?infer=false", Collections.emptyList());
639        SparqlEndpointKS ks = new SparqlEndpointKS(endpoint);
640        ks.init();
641        AbstractReasonerComponent reasoner = new SPARQLReasoner(ks);
642        reasoner.init();
643        reasoner.prepareSubsumptionHierarchy();
644
645        List<List<Node>> tuples = new ArrayList<>();
646        QueryExecutionFactory qef = ks.getQueryExecutionFactory();
647        try(QueryExecution qe = qef.createQueryExecution(query)) {
648            List<Var> projectVars = query.getProjectVars();
649            ResultSet rs = qe.execSelect();
650            while(rs.hasNext()) {
651                QuerySolution qs = rs.next();
652                List<Node> tuple = new ArrayList<>();
653                projectVars.forEach(var -> tuple.add(qs.get(var.getName()).asNode()));
654                tuples.add(tuple);
655            }
656        } catch (Exception e) {
657            e.printStackTrace();
658        }
659
660//        tuples = Lists.newArrayList(
661//                        Lists.newArrayList(
662//                                NodeFactory.createURI("http://dbpedia.org/resource/Brad_Pitt"),
663//                                NodeFactory.createLiteral("1963-12-18", XSDDatatype.XSDdate)),
664//                Lists.newArrayList(
665//                        NodeFactory.createURI("http://dbpedia.org/resource/Tom_Hanks"),
666//                        NodeFactory.createLiteral("1956-07-09", XSDDatatype.XSDdate))
667//        );
668
669        Set<String> ignoredProperties = DBpedia.BLACKLIST_PROPERTIES;
670
671        ConciseBoundedDescriptionGenerator cbdGen = new ConciseBoundedDescriptionGeneratorImpl(ks.getQueryExecutionFactory());
672        cbdGen.setIgnoredProperties(ignoredProperties);
673        cbdGen.setAllowedPropertyNamespaces(Sets.newHashSet("http://dbpedia.org/ontology/", "http://dbpedia.org/property/"));
674        cbdGen.setAllowedClassNamespaces(Sets.newHashSet("http://dbpedia.org/ontology/"));
675
676        QueryTreeFactory tf = new QueryTreeFactoryBaseInv();
677        tf.setMaxDepth(2);
678
679        int depth = 1;
680
681        QTLTuples qtl = new QTLTuples(qef);
682        qtl.setMaxTreeDepth(depth);
683        qtl.setBaseIRI(baseIRI);
684        qtl.setPrefixMapping(pm);
685        qtl.setCBDGenerator(cbdGen);
686        qtl.setTreeFactory(tf);
687
688
689        List<AbstractTreeFilter<RDFResourceTree>> filters = Lists.newArrayList(
690                new PredicateExistenceFilterDBpedia(ks)
691                ,new MostSpecificTypesFilter(reasoner)
692//                ,new PredicateExistenceFilter() {
693//                    @Override
694//                    public boolean isMeaningless(Node predicate) {
695//                        return predicate.getURI().startsWith("http://dbpedia.org/property/") ||
696//                                predicate.getURI().startsWith("http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#")||
697//                                predicate.getURI().startsWith("http://www.wikidata.org/entity/") ||
698//                                predicate.getURI().startsWith(RDFS.getURI());
699//                    }
700//                }
701        );
702
703        List<Map.Entry<RDFResourceTree, List<Node>>> solutions = qtl.run(tuples);
704
705        solutions.forEach(sol -> {
706            RDFResourceTree tree = sol.getKey();
707            List<Node> nodes2Select = sol.getValue();
708            QueryTreeUtils.rebuildNodeIDs(tree);
709
710            System.out.println("LGG\n" + tree.getStringRepresentation(
711                    true,
712                    RDFResourceTree.Rendering.INDENTED, baseIRI, pm, true));
713
714            System.out.println("nodes to select:" + nodes2Select);
715            for (AbstractTreeFilter<RDFResourceTree> filter : filters) {
716                filter.setNodes2Keep(nodes2Select);
717                tree = filter.apply(tree);
718            }
719            QueryTreeUtils.rebuildNodeIDs(tree);
720
721            System.out.println("LGG (filtered)\n" + tree.getStringRepresentation(
722                    false,
723                    RDFResourceTree.Rendering.INDENTED, baseIRI, pm, true));
724            tree = new SymmetricPredicatesFilter(Collections.singleton(NodeFactory.createURI("http://dbpedia.org/ontology/spouse"))).apply(tree);
725
726            String learnedQuery = QueryTreeUtils.toSPARQLQueryString(tree, nodes2Select, baseIRI, pm);
727            Query q = QueryFactory.create(learnedQuery);
728            QueryUtils.prunePrefixes(q);
729            System.out.println(q);
730
731            Graph<Vertex, Edge> g = QueryTreeUtils.toGraph(tree, baseIRI, pm);
732            
733            QueryTreeUtils.asGraph(tree, baseIRI, pm, new File(System.getProperty("java.io.tmpdir") + File.separator + "lgg.graphml"));
734
735        });
736
737
738    }
739}