001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.operations.lgg;
020
021import com.google.common.base.StandardSystemProperty;
022import com.google.common.collect.Lists;
023import com.google.common.collect.Sets;
024import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2;
025import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory;
026import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
027import org.apache.commons.lang3.tuple.Triple;
028import org.apache.jena.graph.Node;
029import org.apache.jena.rdf.model.Model;
030import org.apache.jena.rdf.model.ResourceFactory;
031import org.apache.jena.sparql.vocabulary.FOAF;
032import org.apache.jena.vocabulary.OWL;
033import org.apache.jena.vocabulary.RDF;
034import org.apache.jena.vocabulary.RDFS;
035import org.dllearner.algorithms.qtl.QueryTreeUtils;
036import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
037import org.dllearner.algorithms.qtl.impl.QueryTreeFactory;
038import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase;
039import org.dllearner.algorithms.qtl.util.*;
040import org.dllearner.algorithms.qtl.util.filters.NamespaceDropStatementFilter;
041import org.dllearner.algorithms.qtl.util.filters.ObjectDropStatementFilter;
042import org.dllearner.algorithms.qtl.util.filters.PredicateDropStatementFilter;
043import org.dllearner.core.AbstractReasonerComponent;
044import org.dllearner.core.StringRenderer;
045import org.dllearner.core.StringRenderer.Rendering;
046import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator;
047import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl;
048import org.dllearner.kb.sparql.SparqlEndpoint;
049import org.dllearner.reasoning.SPARQLReasoner;
050import org.dllearner.utilities.NonStandardReasoningServices;
051import org.semanticweb.owlapi.model.EntityType;
052
053import java.io.File;
054import java.util.*;
055import java.util.concurrent.TimeUnit;
056import java.util.function.Predicate;
057import java.util.stream.Collectors;
058
059/**
060 * An LGG generator with RDFS entailment enabled.
061 * @author Lorenz Bühmann
062 *
063 */
064public class LGGGeneratorRDFS extends AbstractLGGGenerator {
065
066        protected Entailment entailment = Entailment.SIMPLE;
067        protected AbstractReasonerComponent reasoner;
068
069        /**
070         * @param reasoner the underlying reasoner used for RDFS entailment
071         */
072        public LGGGeneratorRDFS(AbstractReasonerComponent reasoner) {
073                this.reasoner = reasoner;
074                this.entailment = Entailment.RDFS;
075        }
076
077        @Override
078        protected boolean isSubTreeOf(RDFResourceTree tree1, RDFResourceTree tree2) {
079                return QueryTreeUtils.isSubsumedBy(tree1, tree2, reasoner, tree1.isClassNode());
080        }
081
082        @Override
083        protected RDFResourceTree preProcess(RDFResourceTree tree) {
084                QueryTreeUtils.keepMostSpecificTypes(tree, reasoner);
085
086                return QueryTreeUtils.materializeTypes(tree, reasoner);
087        }
088
089        @Override
090        protected RDFResourceTree postProcess(RDFResourceTree tree) {
091                // prune the tree according to the given entailment
092                QueryTreeUtils.prune(tree, reasoner, entailment);
093                return tree;
094        }
095
096        @Override
097        protected Set<Triple<Node, Node, Node>> getRelatedEdges(RDFResourceTree tree1, RDFResourceTree tree2) {
098                Set<Triple<Node, Node, Node>> result = new HashSet<>();
099
100                Predicate<Node> isBuiltIn = n -> isBuiltInEntity(n);
101
102                // split by built-in and non-built-in predicates
103                Map<Boolean, List<Node>> split1 = tree1.getEdges().stream().collect(Collectors.partitioningBy(isBuiltIn));
104                Map<Boolean, List<Node>> split2 = tree2.getEdges().stream().collect(Collectors.partitioningBy(isBuiltIn));
105
106//              SortedSet<Node> edges1 = tree1.getEdges().stream().filter(e -> !isBuiltInEntity(e))
107//                              .collect(Collectors.toCollection(() -> new TreeSet<>(new NodeComparatorInv())));
108//              SortedSet<Node> edges2 = tree2.getEdges().stream().filter(e -> !isBuiltInEntity(e))
109//                              .collect(Collectors.toCollection(() -> new TreeSet<>(new NodeComparatorInv())));
110
111                for (Node e1 : split1.get(false)) {
112                        boolean dataproperty = tree1.getChildren(e1).iterator().next().isLiteralNode();
113                        EntityType entityType = dataproperty ? EntityType.DATA_PROPERTY : EntityType.OBJECT_PROPERTY;
114
115                        split2.get(false).stream()
116                                        .filter(e2 -> {
117                                                RDFResourceTree child = tree2.getChildren(e2).iterator().next();
118                                                return dataproperty && child.isLiteralNode() || !dataproperty && !child.isLiteralNode();
119                                        } )
120                                        .forEach(e2 -> {
121                                                Node lcs = NonStandardReasoningServices.getLeastCommonSubsumer(reasoner, e1, e2, entityType);
122
123                                                if(lcs != null) {
124                                                        result.add(Triple.of(e1, e2, lcs));
125                                                }
126                                        });
127                }
128
129                List<Node> builtInEntities1 = split1.get(true);
130                List<Node> builtInEntities2 = split2.get(true);
131
132                Set<Triple<Node, Node, Node>> builtInEntitiesCommon = builtInEntities1.stream().filter(e -> builtInEntities2.contains(e)).map(
133                                e -> Triple.of(e, e, e)).collect(
134                                Collectors.toSet());
135
136                result.addAll(builtInEntitiesCommon);
137
138                return result;
139        }
140
141        private boolean isBuiltInEntity(Node n) {
142                return n.getNameSpace().equals(RDF.getURI()) ||
143                                n.getNameSpace().equals(RDFS.getURI()) ||
144                                n.getNameSpace().equals(OWL.getURI());
145        }
146
147        @Override
148        protected RDFResourceTree processClassNodes(RDFResourceTree tree1, RDFResourceTree tree2) {
149
150                if(tree1.isResourceNode() && tree2.isResourceNode()) {
151                        System.out.print("LCS(" + tree1 + ", " + tree2 + ")");
152                        Node lcs = NonStandardReasoningServices.getLeastCommonSubsumer(reasoner,
153                                                                                                                                                        tree1.getData(), tree2.getData(),
154                                                                                                                                                        EntityType.CLASS);
155                        System.out.println(" = " + lcs);
156                        if(lcs != null) {
157                                return new RDFResourceTree(lcs);
158                        }
159                }
160
161                RDFResourceTree lgg = new RDFResourceTree();
162
163                Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2);
164                for (Triple<Node, Node, Node> entry : relatedEdges) {
165
166                        Node edge1 = entry.getLeft();
167                        Node edge2 = entry.getMiddle();
168                        Node lcs = entry.getRight();
169
170                        Set<RDFResourceTree> addedChildren = new HashSet<>();
171
172                        // loop over children of first tree
173                        for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1);
174
175                                // loop over children of second tree
176                                for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2);
177
178
179                                        RDFResourceTree lggChild = computeLGG(child1, child2, false);
180
181                                        // check if there was already a more specific child computed before
182                                        // and if so don't add the current one
183                                        boolean add = true;
184                                        for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){
185                                                RDFResourceTree addedChild = it.next();
186
187                                                if(isSubTreeOf(addedChild, lggChild)){
188//                                                              logger.trace("Skipped adding: Previously added child {} is subsumed by {}.",
189//                                                                              addedChild.getStringRepresentation(),
190//                                                                              lggChild.getStringRepresentation());
191                                                        add = false;
192                                                        break;
193                                                } else if(isSubTreeOf(lggChild, addedChild)){
194//                                                              logger.trace("Removing child node: {} is subsumed by previously added child {}.",
195//                                                                              lggChild.getStringRepresentation(),
196//                                                                              addedChild.getStringRepresentation());
197                                                        lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild));
198                                                        it.remove();
199                                                }
200                                        }
201                                        if(add){
202                                                lgg.addChild(lggChild, lcs);
203                                                addedChildren.add(lggChild);
204//                                                      logger.trace("Adding child {}", lggChild.getStringRepresentation());
205                                        }
206                                }
207                        }
208                }
209                return lgg;
210        }
211        
212        public static void main(String[] args) throws Exception {
213                StringRenderer.setRenderer(Rendering.DL_SYNTAX);
214                // knowledge base
215                SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia();
216                endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org");
217                QueryExecutionFactory qef = FluentQueryExecutionFactory
218                                .http(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()).config()
219                                .withCache(CacheUtilsH2.createCacheFrontend(System.getProperty("java.io.tmpdir") + File.separator + "cache", false, TimeUnit.DAYS.toMillis(60)))
220                                .withPagination(10000).withDelay(50, TimeUnit.MILLISECONDS).end().create();
221
222                // tree generation
223                ConciseBoundedDescriptionGenerator cbdGenerator = new ConciseBoundedDescriptionGeneratorImpl(qef);
224                int maxDepth = 2;
225
226                QueryTreeFactory treeFactory = new QueryTreeFactoryBase();
227                treeFactory.setMaxDepth(maxDepth);
228                treeFactory.addDropFilters(
229                                new PredicateDropStatementFilter(StopURIsDBpedia.get()),
230                                new PredicateDropStatementFilter(StopURIsRDFS.get()),
231                                new PredicateDropStatementFilter(StopURIsOWL.get()),
232                                new ObjectDropStatementFilter(StopURIsOWL.get()),
233                                new PredicateDropStatementFilter(StopURIsSKOS.get()),
234                                new ObjectDropStatementFilter(StopURIsSKOS.get()),
235                                new NamespaceDropStatementFilter(Sets.newHashSet("http://dbpedia.org/property/",
236                                                "http://purl.org/dc/terms/", "http://dbpedia.org/class/yago/",
237                                                "http://www.w3.org/2003/01/geo/wgs84_pos#", "http://www.georss.org/georss/", FOAF.getURI())));
238                List<RDFResourceTree> trees = new ArrayList<>();
239                List<String> resources = Lists.newArrayList("http://dbpedia.org/resource/Leipzig",
240                                "http://dbpedia.org/resource/Berlin");
241                for (String resource : resources) {
242                        try {
243                                System.out.println(resource);
244                                Model model = cbdGenerator.getConciseBoundedDescription(resource, maxDepth);
245                                RDFResourceTree tree = treeFactory.getQueryTree(ResourceFactory.createResource(resource), model);
246                                System.out.println(tree.getStringRepresentation());
247                                trees.add(tree);
248                        } catch (Exception e) {
249                                e.printStackTrace();
250                        }
251                }
252
253                // LGG computation
254                SPARQLReasoner reasoner = new SPARQLReasoner(qef);
255                reasoner.setPrecomputeClassHierarchy(true);
256                reasoner.setPrecomputeObjectPropertyHierarchy(true);
257                reasoner.setPrecomputeDataPropertyHierarchy(true);
258                reasoner.init();
259                reasoner.precomputePropertyDomains();
260                reasoner.precomputeObjectPropertyRanges();
261                LGGGenerator lggGen = new LGGGeneratorRDFS(reasoner);
262                RDFResourceTree lgg = lggGen.getLGG(trees);
263
264                System.out.println("LGG");
265                System.out.println(lgg.getStringRepresentation());
266                System.out.println(QueryTreeUtils.toSPARQLQueryString(lgg));
267                System.out.println(QueryTreeUtils.toOWLClassExpression(lgg));
268        }
269
270}