001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl.operations.lgg; 020 021import com.google.common.base.StandardSystemProperty; 022import com.google.common.collect.Lists; 023import com.google.common.collect.Sets; 024import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2; 025import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory; 026import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 027import org.apache.commons.lang3.tuple.Triple; 028import org.apache.jena.graph.Node; 029import org.apache.jena.rdf.model.Model; 030import org.apache.jena.rdf.model.ResourceFactory; 031import org.apache.jena.sparql.vocabulary.FOAF; 032import org.apache.jena.vocabulary.OWL; 033import org.apache.jena.vocabulary.RDF; 034import org.apache.jena.vocabulary.RDFS; 035import org.dllearner.algorithms.qtl.QueryTreeUtils; 036import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 037import org.dllearner.algorithms.qtl.impl.QueryTreeFactory; 038import org.dllearner.algorithms.qtl.impl.QueryTreeFactoryBase; 039import org.dllearner.algorithms.qtl.util.*; 040import org.dllearner.algorithms.qtl.util.filters.NamespaceDropStatementFilter; 041import org.dllearner.algorithms.qtl.util.filters.ObjectDropStatementFilter; 042import org.dllearner.algorithms.qtl.util.filters.PredicateDropStatementFilter; 043import org.dllearner.core.AbstractReasonerComponent; 044import org.dllearner.core.StringRenderer; 045import org.dllearner.core.StringRenderer.Rendering; 046import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; 047import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; 048import org.dllearner.kb.sparql.SparqlEndpoint; 049import org.dllearner.reasoning.SPARQLReasoner; 050import org.dllearner.utilities.NonStandardReasoningServices; 051import org.semanticweb.owlapi.model.EntityType; 052 053import java.io.File; 054import java.util.*; 055import java.util.concurrent.TimeUnit; 056import java.util.function.Predicate; 057import java.util.stream.Collectors; 058 059/** 060 * An LGG generator with RDFS entailment enabled. 061 * @author Lorenz Bühmann 062 * 063 */ 064public class LGGGeneratorRDFS extends AbstractLGGGenerator { 065 066 protected Entailment entailment = Entailment.SIMPLE; 067 protected AbstractReasonerComponent reasoner; 068 069 /** 070 * @param reasoner the underlying reasoner used for RDFS entailment 071 */ 072 public LGGGeneratorRDFS(AbstractReasonerComponent reasoner) { 073 this.reasoner = reasoner; 074 this.entailment = Entailment.RDFS; 075 } 076 077 @Override 078 protected boolean isSubTreeOf(RDFResourceTree tree1, RDFResourceTree tree2) { 079 return QueryTreeUtils.isSubsumedBy(tree1, tree2, reasoner, tree1.isClassNode()); 080 } 081 082 @Override 083 protected RDFResourceTree preProcess(RDFResourceTree tree) { 084 QueryTreeUtils.keepMostSpecificTypes(tree, reasoner); 085 086 return QueryTreeUtils.materializeTypes(tree, reasoner); 087 } 088 089 @Override 090 protected RDFResourceTree postProcess(RDFResourceTree tree) { 091 // prune the tree according to the given entailment 092 QueryTreeUtils.prune(tree, reasoner, entailment); 093 return tree; 094 } 095 096 @Override 097 protected Set<Triple<Node, Node, Node>> getRelatedEdges(RDFResourceTree tree1, RDFResourceTree tree2) { 098 Set<Triple<Node, Node, Node>> result = new HashSet<>(); 099 100 Predicate<Node> isBuiltIn = n -> isBuiltInEntity(n); 101 102 // split by built-in and non-built-in predicates 103 Map<Boolean, List<Node>> split1 = tree1.getEdges().stream().collect(Collectors.partitioningBy(isBuiltIn)); 104 Map<Boolean, List<Node>> split2 = tree2.getEdges().stream().collect(Collectors.partitioningBy(isBuiltIn)); 105 106// SortedSet<Node> edges1 = tree1.getEdges().stream().filter(e -> !isBuiltInEntity(e)) 107// .collect(Collectors.toCollection(() -> new TreeSet<>(new NodeComparatorInv()))); 108// SortedSet<Node> edges2 = tree2.getEdges().stream().filter(e -> !isBuiltInEntity(e)) 109// .collect(Collectors.toCollection(() -> new TreeSet<>(new NodeComparatorInv()))); 110 111 for (Node e1 : split1.get(false)) { 112 boolean dataproperty = tree1.getChildren(e1).iterator().next().isLiteralNode(); 113 EntityType entityType = dataproperty ? EntityType.DATA_PROPERTY : EntityType.OBJECT_PROPERTY; 114 115 split2.get(false).stream() 116 .filter(e2 -> { 117 RDFResourceTree child = tree2.getChildren(e2).iterator().next(); 118 return dataproperty && child.isLiteralNode() || !dataproperty && !child.isLiteralNode(); 119 } ) 120 .forEach(e2 -> { 121 Node lcs = NonStandardReasoningServices.getLeastCommonSubsumer(reasoner, e1, e2, entityType); 122 123 if(lcs != null) { 124 result.add(Triple.of(e1, e2, lcs)); 125 } 126 }); 127 } 128 129 List<Node> builtInEntities1 = split1.get(true); 130 List<Node> builtInEntities2 = split2.get(true); 131 132 Set<Triple<Node, Node, Node>> builtInEntitiesCommon = builtInEntities1.stream().filter(e -> builtInEntities2.contains(e)).map( 133 e -> Triple.of(e, e, e)).collect( 134 Collectors.toSet()); 135 136 result.addAll(builtInEntitiesCommon); 137 138 return result; 139 } 140 141 private boolean isBuiltInEntity(Node n) { 142 return n.getNameSpace().equals(RDF.getURI()) || 143 n.getNameSpace().equals(RDFS.getURI()) || 144 n.getNameSpace().equals(OWL.getURI()); 145 } 146 147 @Override 148 protected RDFResourceTree processClassNodes(RDFResourceTree tree1, RDFResourceTree tree2) { 149 150 if(tree1.isResourceNode() && tree2.isResourceNode()) { 151 System.out.print("LCS(" + tree1 + ", " + tree2 + ")"); 152 Node lcs = NonStandardReasoningServices.getLeastCommonSubsumer(reasoner, 153 tree1.getData(), tree2.getData(), 154 EntityType.CLASS); 155 System.out.println(" = " + lcs); 156 if(lcs != null) { 157 return new RDFResourceTree(lcs); 158 } 159 } 160 161 RDFResourceTree lgg = new RDFResourceTree(); 162 163 Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2); 164 for (Triple<Node, Node, Node> entry : relatedEdges) { 165 166 Node edge1 = entry.getLeft(); 167 Node edge2 = entry.getMiddle(); 168 Node lcs = entry.getRight(); 169 170 Set<RDFResourceTree> addedChildren = new HashSet<>(); 171 172 // loop over children of first tree 173 for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1); 174 175 // loop over children of second tree 176 for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2); 177 178 179 RDFResourceTree lggChild = computeLGG(child1, child2, false); 180 181 // check if there was already a more specific child computed before 182 // and if so don't add the current one 183 boolean add = true; 184 for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){ 185 RDFResourceTree addedChild = it.next(); 186 187 if(isSubTreeOf(addedChild, lggChild)){ 188// logger.trace("Skipped adding: Previously added child {} is subsumed by {}.", 189// addedChild.getStringRepresentation(), 190// lggChild.getStringRepresentation()); 191 add = false; 192 break; 193 } else if(isSubTreeOf(lggChild, addedChild)){ 194// logger.trace("Removing child node: {} is subsumed by previously added child {}.", 195// lggChild.getStringRepresentation(), 196// addedChild.getStringRepresentation()); 197 lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild)); 198 it.remove(); 199 } 200 } 201 if(add){ 202 lgg.addChild(lggChild, lcs); 203 addedChildren.add(lggChild); 204// logger.trace("Adding child {}", lggChild.getStringRepresentation()); 205 } 206 } 207 } 208 } 209 return lgg; 210 } 211 212 public static void main(String[] args) throws Exception { 213 StringRenderer.setRenderer(Rendering.DL_SYNTAX); 214 // knowledge base 215 SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); 216 endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql", "http://dbpedia.org"); 217 QueryExecutionFactory qef = FluentQueryExecutionFactory 218 .http(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()).config() 219 .withCache(CacheUtilsH2.createCacheFrontend(System.getProperty("java.io.tmpdir") + File.separator + "cache", false, TimeUnit.DAYS.toMillis(60))) 220 .withPagination(10000).withDelay(50, TimeUnit.MILLISECONDS).end().create(); 221 222 // tree generation 223 ConciseBoundedDescriptionGenerator cbdGenerator = new ConciseBoundedDescriptionGeneratorImpl(qef); 224 int maxDepth = 2; 225 226 QueryTreeFactory treeFactory = new QueryTreeFactoryBase(); 227 treeFactory.setMaxDepth(maxDepth); 228 treeFactory.addDropFilters( 229 new PredicateDropStatementFilter(StopURIsDBpedia.get()), 230 new PredicateDropStatementFilter(StopURIsRDFS.get()), 231 new PredicateDropStatementFilter(StopURIsOWL.get()), 232 new ObjectDropStatementFilter(StopURIsOWL.get()), 233 new PredicateDropStatementFilter(StopURIsSKOS.get()), 234 new ObjectDropStatementFilter(StopURIsSKOS.get()), 235 new NamespaceDropStatementFilter(Sets.newHashSet("http://dbpedia.org/property/", 236 "http://purl.org/dc/terms/", "http://dbpedia.org/class/yago/", 237 "http://www.w3.org/2003/01/geo/wgs84_pos#", "http://www.georss.org/georss/", FOAF.getURI()))); 238 List<RDFResourceTree> trees = new ArrayList<>(); 239 List<String> resources = Lists.newArrayList("http://dbpedia.org/resource/Leipzig", 240 "http://dbpedia.org/resource/Berlin"); 241 for (String resource : resources) { 242 try { 243 System.out.println(resource); 244 Model model = cbdGenerator.getConciseBoundedDescription(resource, maxDepth); 245 RDFResourceTree tree = treeFactory.getQueryTree(ResourceFactory.createResource(resource), model); 246 System.out.println(tree.getStringRepresentation()); 247 trees.add(tree); 248 } catch (Exception e) { 249 e.printStackTrace(); 250 } 251 } 252 253 // LGG computation 254 SPARQLReasoner reasoner = new SPARQLReasoner(qef); 255 reasoner.setPrecomputeClassHierarchy(true); 256 reasoner.setPrecomputeObjectPropertyHierarchy(true); 257 reasoner.setPrecomputeDataPropertyHierarchy(true); 258 reasoner.init(); 259 reasoner.precomputePropertyDomains(); 260 reasoner.precomputeObjectPropertyRanges(); 261 LGGGenerator lggGen = new LGGGeneratorRDFS(reasoner); 262 RDFResourceTree lgg = lggGen.getLGG(trees); 263 264 System.out.println("LGG"); 265 System.out.println(lgg.getStringRepresentation()); 266 System.out.println(QueryTreeUtils.toSPARQLQueryString(lgg)); 267 System.out.println(QueryTreeUtils.toOWLClassExpression(lgg)); 268 } 269 270}