001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.sparql;
020
021import com.google.common.collect.Lists;
022import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
023import org.apache.commons.lang3.NotImplementedException;
024import org.apache.jena.graph.NodeFactory;
025import org.apache.jena.graph.impl.LiteralLabel;
026import org.apache.jena.query.QueryExecution;
027import org.apache.jena.query.QueryFactory;
028import org.apache.jena.rdf.model.Literal;
029import org.apache.jena.rdf.model.Model;
030import org.apache.jena.rdf.model.ModelFactory;
031import org.apache.jena.riot.WebContent;
032import org.apache.jena.riot.system.ErrorHandler;
033import org.apache.jena.riot.system.ErrorHandlerFactory;
034import org.apache.jena.riot.web.HttpNames;
035import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
036import org.apache.jena.sparql.util.FmtUtils;
037import org.dllearner.algorithms.qtl.QueryTreeUtils;
038import org.dllearner.kb.SparqlEndpointKS;
039import org.dllearner.utilities.QueryUtils;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043import java.io.BufferedReader;
044import java.io.ByteArrayInputStream;
045import java.io.InputStream;
046import java.io.InputStreamReader;
047import java.net.URL;
048import java.nio.charset.StandardCharsets;
049import java.util.ArrayList;
050import java.util.List;
051import java.util.Set;
052import java.util.TreeSet;
053import java.util.concurrent.atomic.AtomicBoolean;
054import java.util.concurrent.atomic.AtomicInteger;
055import java.util.stream.Collectors;
056
057/**
058 * {@inheritDoc}
059 * @author Lorenz Buehmann
060 *
061 */
062public class TreeBasedConciseBoundedDescriptionGenerator implements ConciseBoundedDescriptionGenerator{
063
064        private static final Logger logger = LoggerFactory.getLogger(TreeBasedConciseBoundedDescriptionGenerator.class);
065        private SparqlEndpoint endpoint;
066
067        private Set<String> allowedPropertyNamespaces = new TreeSet<>();
068        private Set<String> allowedObjectNamespaces = new TreeSet<>();
069
070        private QueryExecutionFactory qef;
071
072        private AtomicInteger inIndex = new AtomicInteger(0);
073        private AtomicInteger outIndex = new AtomicInteger(0);
074        private AtomicInteger predIndex = new AtomicInteger(0);
075
076        private boolean useUnionOptimization = true;
077        private boolean workaround = false;
078
079        public TreeBasedConciseBoundedDescriptionGenerator(QueryExecutionFactory qef) {
080                this.qef = qef;
081        }
082
083        public void setWorkaround(boolean workaround) {
084                this.workaround = workaround;
085        }
086
087        public void setEndpoint(SparqlEndpoint endpoint) {
088                this.endpoint = endpoint;
089        }
090
091        public Model getConciseBoundedDescription(LiteralLabel literal, CBDStructureTree structureTree) throws Exception {
092                logger.trace("Computing CBD for {} ...", literal);
093                long start = System.currentTimeMillis();
094                String query = generateQuery(literal, structureTree);
095                System.out.println(query);
096
097                if(workaround) {
098                        return constructWithReplacement(endpoint, query);
099                }
100
101                try(QueryExecution qe = qef.createQueryExecution(query)) {
102                        Model model = qe.execConstruct();
103                        long end = System.currentTimeMillis();
104                        logger.trace("Got {} triples in {} ms.", model.size(), (end - start));
105                        return model;
106                } catch(Exception e) {
107                        logger.error("CBD retrieval failed when using query\n{}", query);
108                        throw new Exception("CBD retrieval failed when using query\n" + query, e);
109                }
110        }
111
112        /* (non-Javadoc)
113             * @see org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator#getConciseBoundedDescription(java.lang.String, int, boolean)
114             */
115        public Model getConciseBoundedDescription(String resourceURI, CBDStructureTree structureTree) throws Exception {
116                logger.trace("Computing CBD for {} ...", resourceURI);
117                long start = System.currentTimeMillis();
118                String query = generateQuery(resourceURI, structureTree);
119                System.out.println(QueryFactory.create(query));
120
121                if(workaround) {
122                        return constructWithReplacement(endpoint, query);
123                }
124                
125                try(QueryExecution qe = qef.createQueryExecution(query)) {
126                        Model model = qe.execConstruct();
127                        long end = System.currentTimeMillis();
128                        logger.trace("Got {} triples in {} ms.", model.size(), (end - start));
129                        return model;
130                } catch(Exception e) {
131                        throw new Exception("CBD retrieval failed when using query\n" + query, e);
132                }
133        }
134
135        @Override
136        public Model getConciseBoundedDescription(String resource, int depth, boolean withTypesForLeafs) {
137                throw new NotImplementedException("please use getConciseBoundedDescription(String resourceURI, CBDStructureTree structureTree) instead.");
138        }
139
140        @Override
141        public void setAllowedPropertyNamespaces(Set<String> namespaces) {
142                this.allowedPropertyNamespaces.addAll(namespaces);
143        }
144        
145        @Override
146        public void setAllowedObjectNamespaces(Set<String> namespaces) {
147                this.allowedObjectNamespaces.addAll(namespaces);
148        }
149
150        /**
151         * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth.
152         * @param literal The example resource for which a CONSTRUCT query is created.
153         * @return the SPARQL query
154         */
155        private String generateQuery(LiteralLabel literal, CBDStructureTree structureTree){
156                reset();
157
158                // get paths to leaf nodes
159                List<List<CBDStructureTree>> pathsToLeafs = QueryTreeUtils.getPathsToLeafs(structureTree);
160
161                StringBuilder query = new StringBuilder();
162                String rootToken = FmtUtils.stringForNode(NodeFactory.createLiteral(literal));
163
164                query.append("CONSTRUCT {\n");
165                // the CONSTRUCT template
166                append(query, structureTree, rootToken, true);
167                query.append("} WHERE {\n");
168                reset();
169                // the query pattern
170                append(query, structureTree, rootToken, false);
171                query.append("}");
172
173                return query.toString();
174        }
175
176        /**
177         * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth.
178         * @param resource The example resource for which a CONSTRUCT query is created.
179         * @return the SPARQL query
180         */
181        private String generateQuery(String resource, CBDStructureTree structureTree){
182                reset();
183
184                // get paths to leaf nodes
185                List<List<CBDStructureTree>> pathsToLeafs = QueryTreeUtils.getPathsToLeafs(structureTree);
186
187                StringBuilder query = new StringBuilder();
188                String rootToken = "<" + resource + ">";
189
190                query.append("CONSTRUCT {\n");
191                // the CONSTRUCT template
192                append(query, structureTree, rootToken, true);
193                query.append("} WHERE {\n");
194                reset();
195                // the query pattern
196                append(query, structureTree, rootToken, false);
197                query.append("}");
198
199                return query.toString();
200        }
201
202        private void append(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) {
203                // use optimization if enabled
204                if(useUnionOptimization) {
205                        appendUnionOptimized2(query, tree, rootVar, isConstructTemplate);
206                        return;
207                }
208
209                tree.getChildren().forEach(child -> {
210                        // check if we have to put it into an OPTIONAL clause
211                        boolean optionalNeeded = !isConstructTemplate && child.isOutNode() && !tree.isRoot() && !tree.isInNode();
212
213                        // open OPTIONAL if necessary
214                        if(optionalNeeded) {
215                                query.append("OPTIONAL {");
216                        }
217
218                        // append triple pattern
219                        String var;
220                        if(child.isInNode()) {
221                                var = "?x_in" + inIndex.getAndIncrement();
222                                String predVar = "?p" + predIndex.getAndIncrement();
223                                query.append(String.format("%s %s %s .\n", var, predVar, rootVar));
224                        } else {
225                                var = "?x_out" + outIndex.getAndIncrement();
226                                String predVar = "?p" + predIndex.getAndIncrement();
227                                query.append(String.format("%s %s %s .\n", rootVar, predVar, var));
228                        }
229
230                        // recursively process the child node
231                        append(query, child, var, isConstructTemplate);
232
233                        // close OPTIONAL if necessary
234                        if(optionalNeeded) {
235                                query.append("}");
236                        }
237                });
238        }
239
240        private void appendUnionOptimized(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) {
241                List<List<CBDStructureTree>> paths = QueryTreeUtils.getPathsToLeafs(tree);
242
243                List<String> tpClusters = paths.stream().map(path -> {
244                        StringBuilder currentVar = new StringBuilder(rootVar);
245                        StringBuilder tps = new StringBuilder();
246                        AtomicBoolean lastOut = new AtomicBoolean(false);
247                        StringBuilder appendix = new StringBuilder();
248                        path.forEach(node -> {
249                                boolean optionalNeeded = !isConstructTemplate && lastOut.get() && node.isOutNode();
250
251                                // open OPTIONAL if necessary
252                                if(optionalNeeded) {
253                                        tps.append("OPTIONAL {");
254                                        appendix.append("}");
255                                }
256
257                                // append triple pattern
258                                String var;
259                                if (node.isInNode()) {
260                                        var = "?x_in" + inIndex.getAndIncrement();
261                                        String predVar = "?p" + predIndex.getAndIncrement();
262                                        tps.append(String.format("%s %s %s .\n", var, predVar, currentVar.toString()));
263                                } else {
264                                        var = "?x_out" + outIndex.getAndIncrement();
265                                        String predVar = "?p" + predIndex.getAndIncrement();
266                                        tps.append(String.format("%s %s %s .\n", currentVar.toString(), predVar, var));
267                                        lastOut.set(true);
268                                }
269                                currentVar.setLength(0);
270                                currentVar.append(var);
271                        });
272
273                        // add closing braces for OPTIONAL if used
274                        tps.append(appendix);
275
276                        return tps.toString();
277                }).collect(Collectors.toList());
278
279                String queryPart = tpClusters.stream()
280                                .map(s -> isConstructTemplate ? s : "{" + s + "}")
281                                .collect(Collectors.joining(isConstructTemplate ? "" : " UNION "));
282                query.append(queryPart);
283        }
284
285        private void appendUnionOptimized2(StringBuilder query, CBDStructureTree tree, String rootVar, boolean isConstructTemplate) {
286                List<List<CBDStructureTree>> paths = QueryTreeUtils.getPathsToLeafs(tree);
287
288                // get all sub-paths
289                paths = paths.stream().flatMap(path -> {
290                                                           List<List<CBDStructureTree>> subPaths = new ArrayList<>();
291                                                           for (int length = 1; length <= path.size(); length++) {
292                                                                   subPaths.add(path.subList(0, length));
293                                                           }
294                                                           return subPaths.stream();
295                                                   }).collect(Collectors.toList());
296
297                String rdfTypeFilter = "FILTER(%s != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)\n";
298
299
300                List<String> tpClusters = paths.stream().map(path -> {
301                        StringBuilder currentVar = new StringBuilder(rootVar);
302                        StringBuilder tps = new StringBuilder();
303                        AtomicBoolean lastOut = new AtomicBoolean(false);
304
305                                path.forEach(node -> {
306                                        // append triple pattern
307                                        String var;
308                                        if (node.isInNode()) {
309                                                if(lastOut.get() && !isConstructTemplate) {
310                                                        tps.append(String.format(rdfTypeFilter, "?p_" + (predIndex.get() - 1)));
311                                                }
312                                                var = "?x_in" + inIndex.getAndIncrement();
313                                                String predVar = "?p" + predIndex.getAndIncrement();
314                                                tps.append(String.format("%s %s %s .\n", var, predVar, currentVar.toString()));
315
316                                        } else {
317                                                var = "?x_out" + outIndex.getAndIncrement();
318                                                String predVar = "?p" + predIndex.getAndIncrement();
319                                                tps.append(String.format("%s %s %s .\n", currentVar.toString(), predVar, var));
320                                                lastOut.set(true);
321                                        }
322                                        currentVar.setLength(0);
323                                        currentVar.append(var);
324                                });
325
326                                return tps.toString();
327                        }).collect(Collectors.toList());
328
329
330                String queryPart = tpClusters.stream()
331                                .map(s -> isConstructTemplate ? s : "{" + s + "}")
332                                .collect(Collectors.joining(isConstructTemplate ? "" : " UNION "));
333                query.append(queryPart);
334        }
335
336        /**
337         * Reset variables indices
338         */
339        private void reset() {
340                inIndex = new AtomicInteger(0);
341                outIndex = new AtomicInteger(0);
342                predIndex = new AtomicInteger(0);
343        }
344
345        @Override
346        public void setIgnoredProperties(Set<String> properties) {
347
348        }
349
350        public void setUseUnionOptimization(boolean useUnionOptimization) {
351                this.useUnionOptimization = useUnionOptimization;
352        }
353
354        public static void main(String[] args) throws Exception {
355                String query = "PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" +
356                                "PREFIX  owl:  <http://www.w3.org/2002/07/owl#>\n" +
357                                "PREFIX  rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
358                                "PREFIX  foaf: <http://xmlns.com/foaf/0.1/>\n" +
359                                "\n" +
360                                "SELECT DISTINCT  ?uri\n" +
361                                "WHERE\n" +
362                                "  { ?x  <http://dbpedia.org/ontology/director>  <http://dbpedia.org/resource/William_Shatner> ;\n" +
363                                "        <http://dbpedia.org/ontology/starring>  ?uri\n" +
364                                "  }";
365
366                query = "PREFIX  dbo:  <http://dbpedia.org/ontology/>\n" +
367                                "PREFIX  :     <http://dbpedia.org/resource/>\n" +
368                                "\n" +
369                                "SELECT DISTINCT  ?uri\n" +
370                                "WHERE\n" +
371                                "  { :The_Three_Dancers\n" +
372                                "              dbo:author    ?person .\n" +
373                                "    ?person   dbo:movement  ?uri\n" +
374                                "  }";
375                query = "PREFIX  dbo:  <http://dbpedia.org/ontology/>\n" +
376                                "PREFIX  :     <http://dbpedia.org/resource/>\n" +
377                                "\n" +
378                                "SELECT DISTINCT  ?uri\n" +
379                                "WHERE\n" +
380                                "  { ?uri dbo:author    ?person . \n" +
381                                "    ?person   dbo:movement  :Test\n ." +
382                                "?in_0 dbo:starring    ?uri . ?in_1 dbo:starring    ?in_0 . ?in_0 dbo:book    ?o_0 ." +
383                                "  }";
384                CBDStructureTree cbdTree = QueryUtils.getOptimalCBDStructure(QueryFactory.create(query));
385                cbdTree = CBDStructureTree.fromTreeString("root:[in:[out:[]],out:[in:[],out:[out:[]]]]");
386
387                System.out.println(cbdTree.toStringVerbose());
388                SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
389                endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org");
390                SparqlEndpointKS ks = new SparqlEndpointKS(endpoint);
391                ks.setQueryDelay(0);
392                ks.setUseCache(false);
393                ks.setRetryCount(0);
394                ks.init();
395
396//              QueryExecutionFactory qef = ks.getQueryExecutionFactory();
397//
398//              String q = "CONSTRUCT {\n" +
399//                              "<http://www4.wiwiss.fu-berlin.de/sider/resource/drugs/2232> ?p0 ?x_out0 .\n" +
400//                              "} WHERE {\n" +
401//                              "{<http://www4.wiwiss.fu-berlin.de/sider/resource/drugs/2232> ?p0 ?x_out0 .\n" +
402//                              "}}";
403//              Model model = ModelFactory.createDefaultModel();
404//              // Parser to first error or warning.
405//              ErrorHandler errHandler = ErrorHandlerFactory.errorHandlerWarn;
406//              model.getReader().setProperty("error-mode","lax");
407//
408//              System.out.println(model.size());
409//
410//
411//              System.exit(0);
412                TreeBasedConciseBoundedDescriptionGenerator cbdGen = new TreeBasedConciseBoundedDescriptionGenerator(ks.getQueryExecutionFactory());
413                Model cbd = cbdGen.getConciseBoundedDescription("http://dbpedia.org/resource/Dan_Gauthier", cbdTree);
414                System.out.println(cbd.size());
415
416                cbdGen.setUseUnionOptimization(false);
417                cbd = cbdGen.getConciseBoundedDescription("http://dbpedia.org/resource/Dan_Gauthier", cbdTree);
418                System.out.println(cbd.size());
419//              cbd.write(System.out, "NTRIPLES");
420        }
421
422        private Model constructWithReplacement(SparqlEndpoint endpoint, String query) throws Exception{
423                QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query);
424                qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs());
425                String request = qe.toString().replace("GET ", "");
426
427                URL url = new URL(request);
428                java.net.HttpURLConnection conn = (java.net.HttpURLConnection) url.openConnection();
429                conn.setRequestMethod("GET");
430                conn.addRequestProperty(HttpNames.hAccept, WebContent.contentTypeRDFXML);
431                try(BufferedReader rdr = new BufferedReader(new InputStreamReader(conn.getInputStream()))) {
432                        Model model = ModelFactory.createDefaultModel();
433                        String buf = null;
434                        StringBuilder doc = new StringBuilder();
435                        while ((buf = rdr.readLine()) != null) {
436                                // Apply regex on buf
437                                if(buf.contains("&#")) {
438                                        buf = buf.replace("&#", "");
439                                }
440                                // build output
441                                doc.append(buf);
442                        }
443                        try(InputStream is = new ByteArrayInputStream(doc.toString().getBytes(StandardCharsets.UTF_8))) {
444                                model.read(is, null);
445                        }
446                        return model;
447                }
448        }
449
450}