001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.operations.lgg;
020
021import com.jamonapi.Monitor;
022import com.jamonapi.MonitorFactory;
023import org.apache.commons.lang3.tuple.Triple;
024import org.apache.jena.datatypes.RDFDatatype;
025import org.apache.jena.graph.Node;
026import org.apache.jena.vocabulary.RDF;
027import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
028import org.dllearner.algorithms.qtl.operations.StoppableOperation;
029import org.dllearner.algorithms.qtl.operations.TimeoutableOperation;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033import java.util.HashSet;
034import java.util.Iterator;
035import java.util.Set;
036import java.util.concurrent.TimeUnit;
037
038/**
039 * An LGG generator that can be stopped and given a timeout.
040 *
041 * @author Lorenz Buehmann
042 *
043 */
044public abstract class AbstractLGGGenerator implements LGGGenerator, StoppableOperation, TimeoutableOperation {
045
046        public enum BlankNodeScope {
047                TREE, DATASET
048        }
049        
050        protected Logger logger = LoggerFactory.getLogger(getClass());
051        
052        private Monitor mon = MonitorFactory.getTimeMonitor("lgg");
053        
054        protected int subCalls;
055        
056        private long timeoutMillis = -1;
057        private long startTime;
058
059        protected volatile boolean stop = false;
060
061        private boolean complete = true;
062
063        private BlankNodeScope blankNodeScope = BlankNodeScope.TREE;
064        
065
066        private void reset() {
067                stop = false;
068                subCalls = 0;
069        }
070
071        /* (non-Javadoc)
072         * @see org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator2#getLGG(org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree, org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree, boolean)
073         */
074        @Override
075        public RDFResourceTree getLGG(RDFResourceTree tree1, RDFResourceTree tree2, boolean learnFilters) {
076                startTime = System.currentTimeMillis();
077
078                reset();
079
080                // apply some pre-processing
081                tree1 = preProcess(tree1);
082                tree2 = preProcess(tree2);
083                
084                // compute the LGG
085                mon.start();
086                RDFResourceTree lgg = computeLGG(tree1, tree2, learnFilters);
087                mon.stop();
088
089                // apply some post-processing
090                lgg = postProcess(lgg);
091
092                addNumbering(0, lgg);
093                
094                return lgg;
095        }
096
097        protected RDFResourceTree computeLGG(RDFResourceTree tree1, RDFResourceTree tree2, boolean learnFilters){
098                subCalls++;
099
100                // 1. compare the root nodes
101                // a) if both root nodes have same URI or literal value, just return one of the two trees as LGG
102                if((tree1.isResourceNode() || tree1.isLiteralValueNode()) && tree1.getData().equals(tree2.getData())){
103                        logger.trace("Early termination. Tree 1 {}  and tree 2 {} describe the same resource.", tree1, tree2);
104                        return tree1;
105                }
106
107                // b) handle literal nodes
108                if(tree1.isLiteralNode() && tree2.isLiteralNode()){
109                        return processLiteralNodes(tree1, tree2);
110                }
111
112                // c) handle class nodes
113                if(tree1.isClassNode()) {
114                        return processClassNodes(tree1, tree2);
115                }
116
117                // d) else create new empty tree
118                RDFResourceTree lgg = new RDFResourceTree();
119
120                // keep name if both blank nodes
121                //TODO workaround for anchor nodes that are used for tuples
122                if(tree1.getData().isBlank() && tree1.getData().matches(tree2.getData())) {
123                        lgg.setData(tree1.getData());
124                }
125                boolean isAnchorNode = tree1.getAnchorVar() != null && tree1.getAnchorVar().matches(tree2.getAnchorVar());
126                if(isAnchorNode) {
127                        lgg.setAnchorVar(tree1.getAnchorVar());
128                }
129
130
131                // 2. compare the edges
132                // we only have to compare edges which are
133                // a) contained in both trees
134                // b) related via subsumption, i.e. p1 ⊑ p2
135
136                // get edges of tree 2 connected via subsumption
137                Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2);
138                for (Triple<Node, Node, Node> entry : relatedEdges){
139                        if(stop || isTimeout()) {
140                                complete = false;
141                                break;
142                        }
143
144                        Node edge1 = entry.getLeft();
145                        Node edge2 = entry.getMiddle();
146                        Node lcs = entry.getRight();
147
148                        Set<RDFResourceTree> addedChildren = new HashSet<>();
149
150                        // loop over children of first tree
151                        for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1);
152                                if(stop || isTimeout()) {
153                                        complete = false;
154                                        break;
155                                }
156                                // loop over children of second tree
157                                for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2);
158                                        if(stop || isTimeout()) {
159                                                complete = false;
160                                                break;
161                                        }
162
163                                        RDFResourceTree lggChild = computeLGG(child1, child2, learnFilters);
164
165                                        // check if there was already a more specific child computed before
166                                        // and if so don't add the current one
167                                        boolean add = true;
168                                        for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){
169                                                RDFResourceTree addedChild = it.next();
170
171                                                if(isSubTreeOf(addedChild, lggChild)){
172//                                                              logger.trace("Skipped adding: Previously added child {} is subsumed by {}.",
173//                                                                              addedChild.getStringRepresentation(),
174//                                                                              lggChild.getStringRepresentation());
175                                                        add = false;
176                                                        if(lggChild.hasAnchor()) {
177                                                                add = true;
178                                                                if(isSubTreeOf(lggChild, addedChild) && !addedChild.hasAnchor()) {
179                                                                        lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild));
180                                                                        it.remove();
181                                                                }
182//                                                              System.err.println("not add anchor " + lggChild.getAnchorVar());
183//                                                              System.err.println(lggChild.getStringRepresentation());
184//                                                              System.err.println("subsumes");
185//                                                              System.err.println(addedChild.getStringRepresentation());
186                                                        }
187                                                        break;
188                                                } else if(isSubTreeOf(lggChild, addedChild)){
189//                                                              logger.trace("Removing child node: {} is subsumed by previously added child {}.",
190//                                                                              lggChild.getStringRepresentation(),
191//                                                                              addedChild.getStringRepresentation());
192                                                        lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild));
193                                                        it.remove();
194                                                        if(addedChild.hasAnchor()) {
195                                                                System.err.println("removed anchor " + addedChild.getAnchorVar());
196                                                        }
197                                                }
198                                        }
199                                        if(add){
200                                                lgg.addChild(lggChild, lcs);
201                                                addedChildren.add(lggChild);
202//                                                      logger.trace("Adding child {}", lggChild.getStringRepresentation());
203                                        }
204                                }
205                        }
206                }
207
208                return lgg;
209        }
210
211        protected RDFResourceTree processClassNodes(RDFResourceTree tree1, RDFResourceTree tree2) {
212                RDFResourceTree lgg = new RDFResourceTree();
213
214                Set<Triple<Node, Node, Node>> relatedEdges = getRelatedEdges(tree1, tree2);
215                for (Triple<Node, Node, Node> entry : relatedEdges) {
216                        if (stop || isTimeout()) {
217                                complete = false;
218                                break;
219                        }
220                        Node edge1 = entry.getLeft();
221                        Node edge2 = entry.getMiddle();
222                        Node lcs = entry.getRight();
223
224                        Set<RDFResourceTree> addedChildren = new HashSet<>();
225
226                        // loop over children of first tree
227                        for(RDFResourceTree child1 : tree1.getChildren(edge1)){//System.out.println("c1:" + child1);
228                                if(stop || isTimeout()) {
229                                        complete = false;
230                                        break;
231                                }
232                                // loop over children of second tree
233                                for(RDFResourceTree child2 : tree2.getChildren(edge2)){//System.out.println("c2:" + child2);
234                                        if(stop || isTimeout()) {
235                                                complete = false;
236                                                break;
237                                        }
238
239                                        RDFResourceTree lggChild = computeLGG(child1, child2, false);
240
241                                        // check if there was already a more specific child computed before
242                                        // and if so don't add the current one
243                                        boolean add = true;
244                                        for(Iterator<RDFResourceTree> it = addedChildren.iterator(); it.hasNext();){
245                                                RDFResourceTree addedChild = it.next();
246
247                                                if(isSubTreeOf(addedChild, lggChild)){
248//                                                              logger.trace("Skipped adding: Previously added child {} is subsumed by {}.",
249//                                                                              addedChild.getStringRepresentation(),
250//                                                                              lggChild.getStringRepresentation());
251                                                        add = false;
252                                                        break;
253                                                } else if(isSubTreeOf(lggChild, addedChild)){
254//                                                              logger.trace("Removing child node: {} is subsumed by previously added child {}.",
255//                                                                              lggChild.getStringRepresentation(),
256//                                                                              addedChild.getStringRepresentation());
257                                                        lgg.removeChild(addedChild, lgg.getEdgeToChild(addedChild));
258                                                        it.remove();
259                                                }
260                                        }
261                                        if(add){
262                                                lgg.addChild(lggChild, lcs);
263                                                addedChildren.add(lggChild);
264//                                                      logger.trace("Adding child {}", lggChild.getStringRepresentation());
265                                        }
266                                }
267                        }
268                }
269                return lgg;
270        }
271
272        protected RDFResourceTree processLiteralNodes(RDFResourceTree tree1, RDFResourceTree tree2) {
273                RDFDatatype d1 = tree1.getData().getLiteralDatatype();
274                RDFDatatype d2 = tree2.getData().getLiteralDatatype();
275
276                RDFResourceTree newTree;
277                if(d1 != null && d1.equals(d2)){
278            newTree = new RDFResourceTree(d1);
279                        // TODO collect literal values
280                } else {
281            newTree = RDFResourceTree.newLiteralNode();
282        }
283        if(tree1.getAnchorVar() != null && tree1.getAnchorVar().matches(tree2.getAnchorVar())) {
284            newTree.setAnchorVar(tree1.getAnchorVar());
285        }
286                return newTree;
287        }
288
289        @Override
290        public void setTimeout(long timeout, TimeUnit timeoutUnits) {
291                this.timeoutMillis = timeoutUnits.toMillis(timeout);
292        }
293
294        @Override
295        public void stop() {
296                stop = true;
297        }
298
299        protected boolean isTimeout() {
300                return timeoutMillis > 0 && System.currentTimeMillis() - startTime >= timeoutMillis;
301        }
302
303        public boolean isComplete() {
304                return complete;
305        }
306
307        public void setBlankNodeScope(BlankNodeScope blankNodeScope) {
308                this.blankNodeScope = blankNodeScope;
309        }
310
311        private void addNumbering(int nodeId, RDFResourceTree tree){
312//              tree.setId(nodeId);
313                for(RDFResourceTree child : tree.getChildren()){
314                        addNumbering(nodeId++, child);
315                }
316        }
317
318        protected RDFResourceTree postProcess(RDFResourceTree tree) {
319                return tree;
320        }
321
322        protected RDFResourceTree preProcess(RDFResourceTree tree) {
323                return tree;
324        }
325
326        protected abstract boolean isSubTreeOf(RDFResourceTree tree1, RDFResourceTree tree2);
327        
328        protected abstract Set<Triple<Node, Node, Node>> getRelatedEdges(RDFResourceTree tree1, RDFResourceTree tree2);
329
330}