001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl.operations; 020 021import org.apache.log4j.Logger; 022import org.dllearner.algorithms.qtl.datastructures.QueryTree; 023import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; 024import org.dllearner.kb.sparql.SparqlEndpoint; 025 026import java.util.*; 027 028/** 029 * This class can be used to simplify a query tree. In general we use it after the LGG is computed, 030 * because the tree of the LGG can be further simplified (only as approximation) by using 031 * the negative trees. In particular there are currently 2 simplifications: 032 * 033 * 1: 034 * If for a node n_1 there exists a path from n_1 to a node n_2 over the edge p, which is also contained 035 * in every other tree, then we can generalise n_2 if it is a resource or remove n_2 if it is a variable. 036 * 037 * 2: 038 * If from a node n_1 there exists to edges p_1 and p_2 which always occur together in other trees 039 * we can remove one edge. 040 * 041 */ 042public class PostLGG<N> { 043 044 private static final Logger logger = Logger.getLogger(PostLGG.class); 045 046 private SparqlEndpoint endpoint; 047 048 public PostLGG(SparqlEndpoint endpoint){ 049 this.endpoint = endpoint; 050 } 051 052 public PostLGG(){ 053 054 } 055 056 public void simplifyTree(QueryTree<N> tree, List<QueryTree<N>> negTrees){ 057 if(tree.getChildren().isEmpty()){ 058 return; 059 } 060 061 if(logger.isDebugEnabled()){ 062 String s = tree.getStringRepresentation(); 063 logger.debug("Making post LGG simplification"); 064 logger.debug("LGG:\n" + s); 065 int i = 1; 066// for(QueryTree<N> negTree : negTrees){ 067// logger.debug("Neg tree (" + i++ + "/" + negTrees.size() +"):\n" + TreeHelper.getAbbreviatedTreeRepresentation(negTree, endpoint.getBaseURI(), endpoint.getPrefixes())); 068// } 069 } 070 071 List<Object> path; 072 boolean pathExists; 073 for(QueryTree<N> leaf : tree.getLeafs()){ 074 pathExists = false; 075 path = getPathFromRootToNode(leaf); 076// if(logger.isInfoEnabled()){ 077// logger.info("Path: " + path); 078// } 079// if(leaf.getParent().getUserObject().equals("?")){ 080 pathExists = true; 081 for(QueryTree<N> negTree : negTrees){ 082 if(!pathExists(leaf, new ArrayList<>(path), negTree)){ 083 pathExists = false; 084 break; 085 } 086 } 087// } 088// if(logger.isInfoEnabled()){ 089// logger.info("Exists: " + pathExists); 090// } 091 if(pathExists){ 092 String pathString = "[" + leaf.getParent().getUserObject() + "--" + leaf.getParent().getEdge(leaf) + "--" + leaf.getUserObject() + "]"; 093 leaf.getParent().removeChild((QueryTreeImpl<N>) leaf); 094 if(logger.isDebugEnabled()){ 095 logger.debug("Removing edge " + pathString + " from LGG because this occurs also in all negative trees."); 096 } 097 } 098 } 099// checkSameEdgeOccurences(tree, negTrees); 100// if(logger.isDebugEnabled()){ 101// logger.debug("Pruned tree:\n" + TreeHelper.getAbbreviatedTreeRepresentation(tree, endpoint.getBaseURI(), endpoint.getPrefixes())); 102// } 103 104 } 105 106 private void checkSameEdgeOccurences(QueryTree<N> tree, List<QueryTree<N>> negTrees){ 107 List<Object> path1; 108 List<Object> path2; 109 N label1; 110 N label2; 111 QueryTree<N> parent; 112 Set<Integer> removedNodesIds = new HashSet<>(); 113 for(QueryTree<N> leaf : tree.getLeafs()){ 114 if(!removedNodesIds.contains(leaf.getId())){ 115 parent = leaf.getParent(); 116 for(QueryTree<N> node1 : parent.getChildren()){ 117 for(QueryTree<N> node2 : parent.getChildren()){ 118 if(!node1.equals(node2) && !removedNodesIds.contains(node1.getId()) && !removedNodesIds.contains(node2.getId())) { 119 path1 = getPathFromRootToNode(node1); 120 path2 = getPathFromRootToNode(node2); 121 label1 = node1.getUserObject(); 122 label2 = node2.getUserObject(); 123 124 boolean remove = false; 125 for(QueryTree<N> negTree : negTrees){ 126 int ret = containsEdgeCombination(negTree, new ArrayList<>(path1), new ArrayList<>(path2), label1, label2); 127 if(ret == -1){ 128 remove = false; 129 break; 130 } else if(ret == 1){ 131 remove = true; 132 } 133 134 } 135 if(remove){ 136 if(!removedNodesIds.contains(node2.getId())){ 137 logger.debug("Removing\n" + node2.getParent().getEdge(node2) + "---" + node2 + "\n because always occurs together with\n" 138 + node1.getParent().getEdge(node1) + "---" + node1); 139 node2.getParent().removeChild((QueryTreeImpl<N>) node2); 140 removedNodesIds.add(node2.getId()); 141 } 142 } 143 144 } 145 } 146 } 147 } 148 149 150 } 151 } 152 153 private int containsEdgeCombination(QueryTree<N> tree, List<Object> path1, List<Object> path2, N label1, N label2){ 154 Object lastEdge1 = path1.remove(path1.size()-1); 155 Object lastEdge2 = path2.remove(path2.size()-1); 156 157 List<Object> path = path1; 158 159 List<QueryTree<N>> nodes = getNodesByPath(tree, path); 160 List<QueryTree<N>> children1; 161 List<QueryTree<N>> children2; 162 int ret = 0; 163 if(nodes.isEmpty()){ 164 return 0; 165 } else { 166 for(QueryTree<N> node : nodes){ 167 children1 = node.getChildren(lastEdge1); 168 children2 = node.getChildren(lastEdge2); 169 boolean exists1 = false; 170 boolean exists2 = false; 171 for(QueryTree<N> child1 : children1){ 172 if(child1.getUserObject().equals(label1)){ 173 exists1 = true; 174 break; 175 } 176 } 177 for(QueryTree<N> child2 : children2){ 178 if(child2.getUserObject().equals(label2)){ 179 exists2 = true; 180 break; 181 } 182 } 183 if((exists1 && !exists2) || (!exists1 && exists2)){ 184 return -1; 185 } else if(exists1 && exists2){ 186 ret = 1; 187 } 188 } 189 } 190 191 return ret; 192 } 193 194 private boolean pathExists(QueryTree<N> leaf, List<Object> path, QueryTree<N> tree){ 195 List<QueryTree<N>> negLeaves; 196 Object lastEdge = path.remove(path.size()-1); 197 198 for(QueryTree<N> node : getNodesByPath(tree, path)){ 199 negLeaves = node.getChildren(lastEdge); 200 boolean exists = false; 201 if(negLeaves.isEmpty()){ 202 return false; 203 } else { 204 if(leaf.getUserObject().equals("?")){ 205 return true; 206 } 207 for(QueryTree<N> negLeaf : negLeaves){ 208 if(negLeaf.getUserObject().equals(leaf.getUserObject())){ 209 exists = true; 210 break; 211 } 212 } 213 } 214 if(!exists){ 215 return false; 216 } 217 } 218 return true; 219 220// List<QueryTree<N>> negLeaves; 221// Object lastEdge = path.remove(path.size()-1); 222// for(QueryTree<N> node : getNodesByPath(tree, path)){ 223// negLeaves = node.getChildren(lastEdge); 224// if(negLeaves.isEmpty()){ 225// return false; 226// } else { 227// if(leaf.getUserObject().equals("?")){ 228// return true; 229// } 230// for(QueryTree<N> negLeaf : negLeaves){ 231// if(negLeaf.getUserObject().equals(leaf.getUserObject())){ 232// return true; 233// } 234// } 235// } 236// } 237// return false; 238 239// for(QueryTree<N> node : getNodesByPath(tree, path)){ 240// if(!node.getUserObject().equals(leaf.getUserObject())){ 241// return false; 242// } 243// } 244// return true; 245 } 246 247 private List<Object> getPathFromRootToNode(QueryTree<N> node){ 248 List<Object> path = new ArrayList<>(); 249 QueryTree<N> parent = node.getParent(); 250 path.add(parent.getEdge(node)); 251 if(!parent.isRoot()){ 252 path.addAll(getPathFromRootToNode(parent)); 253 } 254 Collections.reverse(path); 255 return path; 256 } 257 258 private List<QueryTree<N>> getNodesByPath(QueryTree<N> tree, List<Object> path){ 259 if(path.isEmpty()){ 260 return Collections.singletonList(tree); 261 } 262 List<QueryTree<N>> nodes = new ArrayList<>(); 263 Object edge = path.remove(0); 264 for(QueryTree<N> child : tree.getChildren(edge)){ 265 if(path.isEmpty()){ 266 nodes.add(child); 267 } else { 268 nodes.addAll(getNodesByPath(child, path)); 269 } 270 } 271 272 return nodes; 273 } 274 275 private boolean existsPathInEveryTree(List<Object> path, List<QueryTree<N>> trees){ 276 for(QueryTree<N> tree : trees){ 277 if(!existsPath(path, tree)){ 278 return false; 279 } 280 } 281 return true; 282 } 283 284 private boolean existsPath(List<Object> path, QueryTree<N> tree){ 285 Object edge = path.remove(0); 286 if(!path.isEmpty()){ 287 for(QueryTree<N> child : tree.getChildren(edge)){ 288 if(existsPath(new ArrayList<>(path), child)){ 289 return true; 290 } 291 } 292 } 293 return false; 294 } 295 296}