001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.operations;
020
021import org.apache.log4j.Logger;
022import org.dllearner.algorithms.qtl.datastructures.QueryTree;
023import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl;
024import org.dllearner.kb.sparql.SparqlEndpoint;
025
026import java.util.*;
027
028/**
029 * This class can be used to simplify a query tree. In general we use it after the LGG is computed, 
030 * because the tree of the LGG can be further simplified (only as approximation) by using 
031 * the negative trees. In particular there are currently 2 simplifications:
032 * 
033 * 1: 
034 * If for a node n_1 there exists a path from n_1 to a node n_2 over the edge p, which is also contained
035 * in every other tree, then we can generalise n_2 if it is a resource or remove n_2 if it is a variable.
036 * 
037 * 2:
038 * If from a node n_1 there exists to edges p_1 and p_2 which always occur together in other trees
039 * we can remove one edge.
040 * 
041 */
042public class PostLGG<N> {
043        
044        private static final Logger logger = Logger.getLogger(PostLGG.class);
045        
046        private SparqlEndpoint endpoint;
047        
048        public PostLGG(SparqlEndpoint endpoint){
049                this.endpoint = endpoint;
050        }
051        
052        public PostLGG(){
053                
054        }
055        
056        public void simplifyTree(QueryTree<N> tree, List<QueryTree<N>> negTrees){
057                if(tree.getChildren().isEmpty()){
058                        return;
059                }
060                
061                if(logger.isDebugEnabled()){
062                        String s = tree.getStringRepresentation();
063                        logger.debug("Making post LGG simplification");
064                        logger.debug("LGG:\n" + s);
065                        int i = 1;
066//                      for(QueryTree<N> negTree : negTrees){
067//                              logger.debug("Neg tree (" + i++ + "/" + negTrees.size() +"):\n" + TreeHelper.getAbbreviatedTreeRepresentation(negTree, endpoint.getBaseURI(), endpoint.getPrefixes()));
068//                      }
069                }
070                
071                List<Object> path;
072                boolean pathExists;
073                for(QueryTree<N> leaf : tree.getLeafs()){
074                        pathExists = false;
075                        path = getPathFromRootToNode(leaf);
076//                      if(logger.isInfoEnabled()){
077//                              logger.info("Path: " + path);
078//                      }
079//                      if(leaf.getParent().getUserObject().equals("?")){
080                                pathExists = true;
081                                for(QueryTree<N> negTree : negTrees){
082                                        if(!pathExists(leaf, new ArrayList<>(path), negTree)){
083                                                pathExists = false;
084                                                break;
085                                        }
086                                }
087//                      }
088//                      if(logger.isInfoEnabled()){
089//                              logger.info("Exists: " + pathExists);
090//                      }
091                        if(pathExists){
092                                String pathString = "[" + leaf.getParent().getUserObject() + "--" + leaf.getParent().getEdge(leaf) + "--" + leaf.getUserObject() + "]";
093                                leaf.getParent().removeChild((QueryTreeImpl<N>) leaf);
094                                if(logger.isDebugEnabled()){
095                                        logger.debug("Removing edge " + pathString + " from LGG because this occurs also in all negative trees.");
096                                }
097                        }
098                }
099//              checkSameEdgeOccurences(tree, negTrees);
100//              if(logger.isDebugEnabled()){
101//                      logger.debug("Pruned tree:\n" + TreeHelper.getAbbreviatedTreeRepresentation(tree, endpoint.getBaseURI(), endpoint.getPrefixes()));
102//              }
103                
104        }
105        
106        private void checkSameEdgeOccurences(QueryTree<N> tree, List<QueryTree<N>> negTrees){
107                List<Object> path1;
108                List<Object> path2;
109                N label1;
110                N label2;
111                QueryTree<N> parent;
112                Set<Integer> removedNodesIds = new HashSet<>();
113                for(QueryTree<N> leaf : tree.getLeafs()){
114                        if(!removedNodesIds.contains(leaf.getId())){
115                                parent = leaf.getParent();
116                                for(QueryTree<N> node1 : parent.getChildren()){
117                                        for(QueryTree<N> node2 : parent.getChildren()){
118                                                if(!node1.equals(node2) && !removedNodesIds.contains(node1.getId()) && !removedNodesIds.contains(node2.getId())) {
119                                                        path1 = getPathFromRootToNode(node1);
120                                                        path2 = getPathFromRootToNode(node2);
121                                                        label1 = node1.getUserObject();
122                                                        label2 = node2.getUserObject();
123                                                        
124                                                        boolean remove = false;
125                                                        for(QueryTree<N> negTree : negTrees){
126                                                                int ret = containsEdgeCombination(negTree, new ArrayList<>(path1), new ArrayList<>(path2), label1, label2);
127                                                                if(ret == -1){
128                                                                        remove = false;
129                                                                        break;
130                                                                } else if(ret == 1){
131                                                                        remove = true;
132                                                                }
133                                                                
134                                                        }
135                                                        if(remove){
136                                                                if(!removedNodesIds.contains(node2.getId())){
137                                                                        logger.debug("Removing\n" + node2.getParent().getEdge(node2) + "---" + node2 + "\n because always occurs together with\n"
138                                                                                        + node1.getParent().getEdge(node1) + "---" + node1);
139                                                                        node2.getParent().removeChild((QueryTreeImpl<N>) node2);
140                                                                        removedNodesIds.add(node2.getId());
141                                                                }
142                                                        }
143                                                        
144                                                }
145                                        }
146                                }
147                        }
148                        
149                        
150                }
151        }
152        
153        private int containsEdgeCombination(QueryTree<N> tree, List<Object> path1, List<Object> path2, N label1, N label2){
154                Object lastEdge1 = path1.remove(path1.size()-1);
155                Object lastEdge2 = path2.remove(path2.size()-1);
156                
157                List<Object> path = path1;
158
159                List<QueryTree<N>> nodes = getNodesByPath(tree, path);
160                List<QueryTree<N>> children1;
161                List<QueryTree<N>> children2;
162                int ret = 0;
163                if(nodes.isEmpty()){
164                        return 0;
165                } else {
166                        for(QueryTree<N> node : nodes){
167                                children1 = node.getChildren(lastEdge1);
168                                children2 = node.getChildren(lastEdge2);
169                                boolean exists1 = false;
170                                boolean exists2 = false;
171                                for(QueryTree<N> child1 : children1){
172                                        if(child1.getUserObject().equals(label1)){
173                                                exists1 = true;
174                                                break;
175                                        }
176                                }
177                                for(QueryTree<N> child2 : children2){
178                                        if(child2.getUserObject().equals(label2)){
179                                                exists2 = true;
180                                                break;
181                                        }
182                                }
183                                if((exists1 && !exists2) || (!exists1 && exists2)){
184                                        return -1;
185                                } else if(exists1 && exists2){
186                                        ret = 1;
187                                }
188                        }
189                }
190                
191                return ret;
192        }
193        
194        private boolean pathExists(QueryTree<N> leaf, List<Object> path, QueryTree<N> tree){
195                List<QueryTree<N>> negLeaves;
196                Object lastEdge = path.remove(path.size()-1);
197                
198                for(QueryTree<N> node : getNodesByPath(tree, path)){
199                        negLeaves = node.getChildren(lastEdge);
200                        boolean exists = false;
201                        if(negLeaves.isEmpty()){
202                                return false;
203                        } else {
204                                if(leaf.getUserObject().equals("?")){
205                                        return true;
206                                }
207                                for(QueryTree<N> negLeaf : negLeaves){
208                                        if(negLeaf.getUserObject().equals(leaf.getUserObject())){
209                                                exists = true;
210                                                break;
211                                        }
212                                }
213                        }
214                        if(!exists){
215                                return false;
216                        }
217                }
218                return true;
219                
220//              List<QueryTree<N>> negLeaves;
221//              Object lastEdge = path.remove(path.size()-1);
222//              for(QueryTree<N> node : getNodesByPath(tree, path)){
223//                      negLeaves = node.getChildren(lastEdge);
224//                      if(negLeaves.isEmpty()){
225//                              return false;
226//                      } else {
227//                              if(leaf.getUserObject().equals("?")){
228//                                      return true;
229//                              }
230//                              for(QueryTree<N> negLeaf : negLeaves){
231//                                      if(negLeaf.getUserObject().equals(leaf.getUserObject())){
232//                                              return true;
233//                                      }
234//                              }
235//                      }
236//              }
237//              return false;
238                
239//              for(QueryTree<N> node : getNodesByPath(tree, path)){
240//                      if(!node.getUserObject().equals(leaf.getUserObject())){
241//                              return false;
242//                      }
243//              }
244//              return true;
245        }
246        
247        private List<Object> getPathFromRootToNode(QueryTree<N> node){
248                List<Object> path = new ArrayList<>();
249                QueryTree<N> parent = node.getParent();
250                path.add(parent.getEdge(node));
251                if(!parent.isRoot()){
252                        path.addAll(getPathFromRootToNode(parent));
253                }
254                Collections.reverse(path);
255                return path;
256        }
257        
258        private List<QueryTree<N>> getNodesByPath(QueryTree<N> tree, List<Object> path){
259                if(path.isEmpty()){
260                        return Collections.singletonList(tree);
261                }
262                List<QueryTree<N>> nodes = new ArrayList<>();
263                Object edge = path.remove(0);
264                for(QueryTree<N> child : tree.getChildren(edge)){
265                        if(path.isEmpty()){
266                                nodes.add(child);
267                        } else {
268                                nodes.addAll(getNodesByPath(child, path));
269                        }
270                }
271                
272                return nodes;
273        }
274        
275        private boolean existsPathInEveryTree(List<Object> path, List<QueryTree<N>> trees){
276                for(QueryTree<N> tree : trees){
277                        if(!existsPath(path, tree)){
278                                return false;
279                        }
280                }
281                return true;
282        }
283        
284        private boolean existsPath(List<Object> path, QueryTree<N> tree){
285                Object edge = path.remove(0);
286                if(!path.isEmpty()){
287                        for(QueryTree<N> child : tree.getChildren(edge)){
288                                if(existsPath(new ArrayList<>(path), child)){
289                                        return true;
290                                }
291                        }
292                }
293                return false;
294        }
295
296}