001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl.operations.lgg; 020 021import com.jamonapi.Monitor; 022import com.jamonapi.MonitorFactory; 023import org.apache.commons.lang3.tuple.Triple; 024import org.apache.jena.datatypes.RDFDatatype; 025import org.apache.jena.graph.Node; 026import org.apache.jena.vocabulary.RDF; 027import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 028import org.dllearner.algorithms.qtl.operations.StoppableOperation; 029import org.dllearner.algorithms.qtl.operations.TimeoutableOperation; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import java.util.HashSet; 034import java.util.Iterator; 035import java.util.Set; 036import java.util.concurrent.TimeUnit; 037 038/** 039 * An LGG generator that can be stopped and given a timeout. 040 * 041 * @author Lorenz Buehmann 042 * 043 */ 044public abstract class AbstractLGGGenerator implements LGGGenerator, StoppableOperation, TimeoutableOperation { 045 046 public enum BlankNodeScope { 047 TREE, DATASET 048 } 049 050 protected Logger logger = LoggerFactory.getLogger(getClass()); 051 052 private Monitor mon = MonitorFactory.getTimeMonitor("lgg"); 053 054 protected int subCalls; 055 056 private long timeoutMillis = -1; 057 private long startTime; 058 059 protected volatile boolean stop = false; 060 061 private boolean complete = true; 062 063 private BlankNodeScope blankNodeScope = BlankNodeScope.TREE; 064 065 066 private void reset() { 067 stop = false; 068 subCalls = 0; 069 } 070 071 /* (non-Javadoc) 072 * @see org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator2#getLGG(org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree, org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree, boolean) 073 */ 074 @Override 075 public RDFResourceTree getLGG(RDFResourceTree tree1, RDFResourceTree tree2, boolean learnFilters) { 076 startTime = System.currentTimeMillis(); 077 078 reset(); 079 080 // apply some pre-processing 081 tree1 = preProcess(tree1); 082 tree2 = preProcess(tree2); 083 084 // compute the LGG 085 mon.start(); 086 RDFResourceTree lgg = computeLGG(tree1, tree2, learnFilters); 087 mon.stop(); 088 089 // apply some post-processing 090 lgg = postProcess(lgg); 091 092 addNumbering(0, lgg); 093 094 return lgg; 095 } 096 097 protected RDFResourceTree computeLGG(RDFResourceTree tree1, RDFResourceTree tree2, boolean learnFilters){ 098 subCalls++; 099 100 // 1. compare the root nodes 101 // a) if both root nodes have same URI or literal value, just return one of the two trees as LGG 102 if((tree1.isResourceNode() || tree1.isLiteralValueNode()) && tree1.getData().equals(tree2.getData())){ 103 logger.trace("Early termination. Tree 1 {} and tree 2 {} describe the same resource.", tree1, tree2); 104 return tree1; 105 } 106 107 // b) handle literal nodes 108 if(tree1.isLiteralNode() && tree2.isLiteralNode()){ 109 return processLiteralNodes(tree1, tree2); 110 } 111 112 // c) handle class nodes 113 if(tree1.isClassNode()) { 114 return processClassNodes(tree1, tree2); 115 } 116 117 // d) else create new empty tree 118 RDFResourceTree lgg = new RDFResourceTree(); 119 120 // keep name if both blank nodes 121 //TODO workaround for anchor nodes that are used for tuples 122 if(tree1.getData().isBlank() && tree1.getData().matches(tree2.getData())) { 123 lgg.setData(tree1.getData()); 124 } 125 boolean isAnchorNode = tree1.getAnchorVar() != null && tree1.getAnchorVar().matches(tree2.getAnchorVar()); 126 if(isAnchorNode) { 127 lgg.setAnchorVar(tree1.getAnchorVar()); 128 } 129 130 131 // 2. compare the edges 132 // we only have to compare edges which are 133 // a) contained in both trees 134 // b) related via subsumption, i.e. p1 â p2 135 136 // get edges of tree 2 connected via subsumption 137 Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2); 138 for (Triple<Node, Node, Node> entry : relatedEdges){ 139 if(stop || isTimeout()) { 140 complete = false; 141 break; 142 } 143 144 Node edge1 = entry.getLeft(); 145 Node edge2 = entry.getMiddle(); 146 Node lcs = entry.getRight(); 147 148 Set<RDFResourceTree> addedChildren = new HashSet<>(); 149 150 // loop over children of first tree 151 for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1); 152 if(stop || isTimeout()) { 153 complete = false; 154 break; 155 } 156 // loop over children of second tree 157 for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2); 158 if(stop || isTimeout()) { 159 complete = false; 160 break; 161 } 162 163 RDFResourceTree lggChild = computeLGG(child1, child2, learnFilters); 164 165 // check if there was already a more specific child computed before 166 // and if so don't add the current one 167 boolean add = true; 168 for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){ 169 RDFResourceTree addedChild = it.next(); 170 171 if(isSubTreeOf(addedChild, lggChild)){ 172// logger.trace("Skipped adding: Previously added child {} is subsumed by {}.", 173// addedChild.getStringRepresentation(), 174// lggChild.getStringRepresentation()); 175 add = false; 176 if(lggChild.hasAnchor()) { 177 add = true; 178 if(isSubTreeOf(lggChild, addedChild) && !addedChild.hasAnchor()) { 179 lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild)); 180 it.remove(); 181 } 182// System.err.println("not add anchor " + lggChild.getAnchorVar()); 183// System.err.println(lggChild.getStringRepresentation()); 184// System.err.println("subsumes"); 185// System.err.println(addedChild.getStringRepresentation()); 186 } 187 break; 188 } else if(isSubTreeOf(lggChild, addedChild)){ 189// logger.trace("Removing child node: {} is subsumed by previously added child {}.", 190// lggChild.getStringRepresentation(), 191// addedChild.getStringRepresentation()); 192 lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild)); 193 it.remove(); 194 if(addedChild.hasAnchor()) { 195 System.err.println("removed anchor " + addedChild.getAnchorVar()); 196 } 197 } 198 } 199 if(add){ 200 lgg.addChild(lggChild, lcs); 201 addedChildren.add(lggChild); 202// logger.trace("Adding child {}", lggChild.getStringRepresentation()); 203 } 204 } 205 } 206 } 207 208 return lgg; 209 } 210 211 protected RDFResourceTree processClassNodes(RDFResourceTree tree1, RDFResourceTree tree2) { 212 RDFResourceTree lgg = new RDFResourceTree(); 213 214 Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2); 215 for (Triple<Node, Node, Node> entry : relatedEdges) { 216 if (stop || isTimeout()) { 217 complete = false; 218 break; 219 } 220 Node edge1 = entry.getLeft(); 221 Node edge2 = entry.getMiddle(); 222 Node lcs = entry.getRight(); 223 224 Set<RDFResourceTree> addedChildren = new HashSet<>(); 225 226 // loop over children of first tree 227 for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1); 228 if(stop || isTimeout()) { 229 complete = false; 230 break; 231 } 232 // loop over children of second tree 233 for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2); 234 if(stop || isTimeout()) { 235 complete = false; 236 break; 237 } 238 239 RDFResourceTree lggChild = computeLGG(child1, child2, false); 240 241 // check if there was already a more specific child computed before 242 // and if so don't add the current one 243 boolean add = true; 244 for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){ 245 RDFResourceTree addedChild = it.next(); 246 247 if(isSubTreeOf(addedChild, lggChild)){ 248// logger.trace("Skipped adding: Previously added child {} is subsumed by {}.", 249// addedChild.getStringRepresentation(), 250// lggChild.getStringRepresentation()); 251 add = false; 252 break; 253 } else if(isSubTreeOf(lggChild, addedChild)){ 254// logger.trace("Removing child node: {} is subsumed by previously added child {}.", 255// lggChild.getStringRepresentation(), 256// addedChild.getStringRepresentation()); 257 lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild)); 258 it.remove(); 259 } 260 } 261 if(add){ 262 lgg.addChild(lggChild, lcs); 263 addedChildren.add(lggChild); 264// logger.trace("Adding child {}", lggChild.getStringRepresentation()); 265 } 266 } 267 } 268 } 269 return lgg; 270 } 271 272 protected RDFResourceTree processLiteralNodes(RDFResourceTree tree1, RDFResourceTree tree2) { 273 RDFDatatype d1 = tree1.getData().getLiteralDatatype(); 274 RDFDatatype d2 = tree2.getData().getLiteralDatatype(); 275 276 RDFResourceTree newTree; 277 if(d1 != null && d1.equals(d2)){ 278 newTree = new RDFResourceTree(d1); 279 // TODO collect literal values 280 } else { 281 newTree = RDFResourceTree.newLiteralNode(); 282 } 283 if(tree1.getAnchorVar() != null && tree1.getAnchorVar().matches(tree2.getAnchorVar())) { 284 newTree.setAnchorVar(tree1.getAnchorVar()); 285 } 286 return newTree; 287 } 288 289 @Override 290 public void setTimeout(long timeout, TimeUnit timeoutUnits) { 291 this.timeoutMillis = timeoutUnits.toMillis(timeout); 292 } 293 294 @Override 295 public void stop() { 296 stop = true; 297 } 298 299 protected boolean isTimeout() { 300 return timeoutMillis > 0 && System.currentTimeMillis() - startTime >= timeoutMillis; 301 } 302 303 public boolean isComplete() { 304 return complete; 305 } 306 307 public void setBlankNodeScope(BlankNodeScope blankNodeScope) { 308 this.blankNodeScope = blankNodeScope; 309 } 310 311 private void addNumbering(int nodeId, RDFResourceTree tree){ 312// tree.setId(nodeId); 313 for(RDFResourceTree child : tree.getChildren()){ 314 addNumbering(nodeId++, child); 315 } 316 } 317 318 protected RDFResourceTree postProcess(RDFResourceTree tree) { 319 return tree; 320 } 321 322 protected RDFResourceTree preProcess(RDFResourceTree tree) { 323 return tree; 324 } 325 326 protected abstract boolean isSubTreeOf(RDFResourceTree tree1, RDFResourceTree tree2); 327 328 protected abstract Set<Triple<Node, Node, Node>> getRelatedEdges(RDFResourceTree tree1, RDFResourceTree tree2); 329 330}