001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.extraction;
020
021import java.util.ArrayList;
022import java.util.List;
023import java.util.SortedSet;
024import java.util.TreeSet;
025
026import org.apache.log4j.Logger;
027import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
028import org.dllearner.kb.aquisitors.TupleAquisitor;
029import org.dllearner.utilities.JamonMonitorLogger;
030import org.dllearner.utilities.statistics.SimpleClock;
031
032import com.jamonapi.Monitor;
033
034/**
035 * This class is used to extract the information .
036 * 
037 * @author Sebastian Hellmann
038 */
039public class ExtractionAlgorithm {
040
041        private Configuration configuration;
042        private SortedSet<String> alreadyQueriedSuperClasses = new TreeSet<>();
043        private boolean stop = false;
044
045        
046        private static Logger logger = Logger
047                .getLogger(ExtractionAlgorithm.class);
048
049        public ExtractionAlgorithm(Configuration configuration) {
050                this.configuration = configuration;
051        }
052
053        
054        public void stop(){
055                stop=true;
056        }
057        
058        private boolean stopCondition(){
059                return stop;
060        }
061        
062        void reset(){
063                stop = false;
064        }
065        
066        private Node getFirstNode(String uri) {
067                return new InstanceNode(uri);
068        }
069
070        @SuppressWarnings("unused")
071        private List<Node> expandAll(String[] uris, TupleAquisitor tupelAquisitor) {
072                List<Node> nodeList = new ArrayList<>();
073                for (String oneURI : uris) {
074                        nodeList.add(expandNode(oneURI, tupelAquisitor));
075                }
076                
077                return nodeList;
078        }
079
080        /**
081         * most important function expands one example 
082         * CAVE: the recursion is not a
083         * recursion anymore, it was transformed to an iteration
084         *
085         */
086        public Node expandNode(String uri, TupleAquisitor tupleAquisitor) {
087                SimpleClock sc = new SimpleClock();
088                if(tupleAquisitor instanceof SparqlTupleAquisitorImproved){
089                        ((SparqlTupleAquisitorImproved)tupleAquisitor).removeFromCache(uri);
090                }
091                
092                Node seedNode = getFirstNode(uri);
093                List<Node> newNodes = new ArrayList<>();
094                List<Node> collectNodes = new ArrayList<>();
095                List<Node> tmp = new ArrayList<>();
096                
097                
098                logger.info("Seed Node: "+seedNode);
099                newNodes.add(seedNode);
100                
101
102                Monitor basic = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBasicExtraction").start();
103                for (int x = 1; x <= configuration.getRecursiondepth(); x++) {
104                        
105                        sc.reset();
106                        while (!newNodes.isEmpty() && !stopCondition()) {
107                                Node nextNode = newNodes.remove(0);
108                                logger.info("Expanding " + nextNode);
109                                
110                                // these are the new not expanded nodes
111                                // the others are saved in connection with the original node
112                                tupleAquisitor.setNextTaskToNormal();
113                                tmp.addAll(nextNode.expand(tupleAquisitor,
114                                                configuration.getManipulator()));
115                                //.out.println(tmpVec);
116                                
117                        }
118                        collectNodes.addAll(tmp);
119                        newNodes.addAll(tmp);
120                        tmp.clear();
121                        
122                        logger.info("Recursion counter: " + x + " with " + newNodes.size()
123                                        + " Nodes remaining, " + sc.getAndSet(""));
124                }
125                basic.stop();
126                
127                if(configuration.isCloseAfterRecursion()&& !stopCondition()){
128                        Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeCloseAfterRecursion").start();
129                        List<InstanceNode> l = getInstanceNodes(newNodes);
130                        logger.info("Getting classes for remaining instances: "+l.size() + " instances");
131                        tupleAquisitor.setNextTaskToClassesForInstances();
132                        collectNodes.addAll(expandCloseAfterRecursion(l, tupleAquisitor));
133                        m.stop();
134                }
135                // gets All Class Nodes and expands them further
136                if (configuration.isGetAllSuperClasses()&& !stopCondition()) {
137                        Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetAllSuperClasses").start();
138                        List<ClassNode> allClassNodes = getClassNodes(collectNodes);
139                        tupleAquisitor.setNextTaskToClassInformation();
140                        logger.info("Get all superclasses for "+allClassNodes.size() + " classes");
141                        expandAllSuperClassesOfANode(allClassNodes, tupleAquisitor);
142                        m.stop();
143                }
144                        
145                
146                if(configuration.isGetPropertyInformation()&& !stopCondition() ){
147                        collectNodes.add(seedNode);
148                        Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetPropertyInformation").start();
149                        List<ObjectPropertyNode> objectProperties = getObjectPropertyNodes(collectNodes);
150                        logger.info("Get info for "+objectProperties.size() + " objectProperties");
151                        for (ObjectPropertyNode node : objectProperties) {
152                                if(stopCondition()){
153                                        break;
154                                }
155                                collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
156                        }
157                        List<DatatypePropertyNode> datatypeProperties = getDatatypeProperties(collectNodes);
158                        logger.info("Get info for "+datatypeProperties.size() + " datatypeProperties");
159                        for (DatatypePropertyNode node : datatypeProperties) {
160                                if(stopCondition()){
161                                        break;
162                                }
163                                collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
164                        }
165                        m.stop();
166                }
167                
168                Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBlankNode").start();
169                if( configuration.isDissolveBlankNodes() && !stopCondition()){
170                        expandBlankNodes(getBlankNodes(collectNodes),tupleAquisitor);
171                }
172                m.stop();
173                
174        
175                return seedNode;
176
177        }
178        
179        private List<Node> expandBlankNodes(List<BlankNode> blankNodes, TupleAquisitor tupelAquisitor) {
180                List<Node> newNodes = new ArrayList<>();
181                while (!blankNodes.isEmpty()&& !stopCondition()) {
182                        Node next = blankNodes.remove(0);
183                        List<Node> l = next.expand(tupelAquisitor, configuration.getManipulator());
184                        for (Node node : l) {
185                                blankNodes.add((BlankNode) node);
186                        }
187                        
188                }
189                return newNodes;
190        }
191                
192        
193        private List<Node> expandCloseAfterRecursion(List<InstanceNode> instanceNodes, TupleAquisitor tupelAquisitor) {
194                
195                List<Node> newNodes = new ArrayList<>();
196                tupelAquisitor.setNextTaskToClassesForInstances();
197                while (!instanceNodes.isEmpty() && !stopCondition()) {
198                        logger.trace("Getting classes for remaining instances: "
199                                        + instanceNodes.size());
200                        Node next = instanceNodes.remove(0);
201                        if(next.isExpanded()){
202                                JamonMonitorLogger.increaseCount(this.getClass(), "skipped nodes");
203                                continue;
204                        }
205                        logger.trace("Getting classes for: " + next);
206                        newNodes.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
207                        if (newNodes.size() >= configuration.getBreakSuperClassesAfter()) {
208                                break;
209                        }//endif
210                }//endwhile
211                
212                return newNodes;
213        }
214        
215        private void expandAllSuperClassesOfANode(List<ClassNode> allClassNodes, TupleAquisitor tupelAquisitor) {
216                
217                
218                List<Node> newClasses = new ArrayList<>();
219                newClasses.addAll(allClassNodes);
220                //TODO LinkedData incompatibility
221                
222                int i = 0;
223                
224                while (!newClasses.isEmpty() && !stopCondition()) {
225                        logger.trace("Remaining classes: " + newClasses.size());
226                        Node next = newClasses.remove(0);
227                        
228                        logger.trace("Getting Superclasses for: " + next);
229                        
230                        if (!alreadyQueriedSuperClasses.contains(next.getURIString())) {
231                                logger.trace("" + next+" not in cache retrieving");
232                                alreadyQueriedSuperClasses.add(next.getURIString());
233                                tupelAquisitor.setNextTaskToClassInformation();
234                                
235                                newClasses.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
236                                
237                                
238                                
239                                if (i > configuration.getBreakSuperClassesAfter()) {
240                                        break;
241                                }//endinnerif
242                                i++;
243                        }//endouterif
244                        else {
245                                logger.trace("" + next+"  in mem cache skipping");
246                        }
247
248                }//endwhile
249                if(!configuration.isOptimizeForDLLearner()){
250                        alreadyQueriedSuperClasses.clear();
251                }
252
253        }
254        
255        private static List<ClassNode> getClassNodes(List<Node> l ){
256                List<ClassNode> retList = new ArrayList<>();
257                for (Node node : l) {
258                        if (node instanceof ClassNode) {
259                                retList.add( (ClassNode) node);
260                                
261                        }
262                        
263                }
264                return retList;
265        }
266        
267
268        private static List<InstanceNode> getInstanceNodes(List<Node> l ){
269                List<InstanceNode> retList = new ArrayList<>();
270                for (Node node : l) {
271                        if (node instanceof InstanceNode) {
272                                retList.add( (InstanceNode) node);
273                                
274                        }
275                        
276                }
277                return retList;
278        }
279        
280        private static List<BlankNode> getBlankNodes(List<Node> l ){
281                List<BlankNode> retList = new ArrayList<>();
282                for (Node node : l) {
283                        if (node instanceof BlankNode) {
284                                retList.add( (BlankNode) node);
285                                
286                        }
287                        
288                }
289                return retList;
290        }
291        
292        private static List<ObjectPropertyNode> getObjectPropertyNodes(List<Node> l ){
293                List<ObjectPropertyNode> properties = new ArrayList<>();
294                for (Node node : l) {
295                        if (node instanceof InstanceNode) {
296                                properties.addAll(( (InstanceNode) node).getObjectProperties());
297                                
298                        }
299                        
300                }
301                return properties;
302        }
303        
304        private static List<DatatypePropertyNode> getDatatypeProperties(List<Node> l ){
305                List<DatatypePropertyNode> properties = new ArrayList<>();
306                for (Node node : l) {
307                        if (node instanceof InstanceNode) {
308                                properties.addAll(( (InstanceNode) node).getDatatypePropertyNode());
309                        }
310                        
311                }
312                return properties;
313        }
314
315}