001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.extraction; 020 021import java.util.ArrayList; 022import java.util.List; 023import java.util.SortedSet; 024import java.util.TreeSet; 025 026import org.apache.log4j.Logger; 027import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved; 028import org.dllearner.kb.aquisitors.TupleAquisitor; 029import org.dllearner.utilities.JamonMonitorLogger; 030import org.dllearner.utilities.statistics.SimpleClock; 031 032import com.jamonapi.Monitor; 033 034/** 035 * This class is used to extract the information . 036 * 037 * @author Sebastian Hellmann 038 */ 039public class ExtractionAlgorithm { 040 041 private Configuration configuration; 042 private SortedSet<String> alreadyQueriedSuperClasses = new TreeSet<>(); 043 private boolean stop = false; 044 045 046 private static Logger logger = Logger 047 .getLogger(ExtractionAlgorithm.class); 048 049 public ExtractionAlgorithm(Configuration configuration) { 050 this.configuration = configuration; 051 } 052 053 054 public void stop(){ 055 stop=true; 056 } 057 058 private boolean stopCondition(){ 059 return stop; 060 } 061 062 void reset(){ 063 stop = false; 064 } 065 066 private Node getFirstNode(String uri) { 067 return new InstanceNode(uri); 068 } 069 070 @SuppressWarnings("unused") 071 private List<Node> expandAll(String[] uris, TupleAquisitor tupelAquisitor) { 072 List<Node> nodeList = new ArrayList<>(); 073 for (String oneURI : uris) { 074 nodeList.add(expandNode(oneURI, tupelAquisitor)); 075 } 076 077 return nodeList; 078 } 079 080 /** 081 * most important function expands one example 082 * CAVE: the recursion is not a 083 * recursion anymore, it was transformed to an iteration 084 * 085 */ 086 public Node expandNode(String uri, TupleAquisitor tupleAquisitor) { 087 SimpleClock sc = new SimpleClock(); 088 if(tupleAquisitor instanceof SparqlTupleAquisitorImproved){ 089 ((SparqlTupleAquisitorImproved)tupleAquisitor).removeFromCache(uri); 090 } 091 092 Node seedNode = getFirstNode(uri); 093 List<Node> newNodes = new ArrayList<>(); 094 List<Node> collectNodes = new ArrayList<>(); 095 List<Node> tmp = new ArrayList<>(); 096 097 098 logger.info("Seed Node: "+seedNode); 099 newNodes.add(seedNode); 100 101 102 Monitor basic = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBasicExtraction").start(); 103 for (int x = 1; x <= configuration.getRecursiondepth(); x++) { 104 105 sc.reset(); 106 while (!newNodes.isEmpty() && !stopCondition()) { 107 Node nextNode = newNodes.remove(0); 108 logger.info("Expanding " + nextNode); 109 110 // these are the new not expanded nodes 111 // the others are saved in connection with the original node 112 tupleAquisitor.setNextTaskToNormal(); 113 tmp.addAll(nextNode.expand(tupleAquisitor, 114 configuration.getManipulator())); 115 //.out.println(tmpVec); 116 117 } 118 collectNodes.addAll(tmp); 119 newNodes.addAll(tmp); 120 tmp.clear(); 121 122 logger.info("Recursion counter: " + x + " with " + newNodes.size() 123 + " Nodes remaining, " + sc.getAndSet("")); 124 } 125 basic.stop(); 126 127 if(configuration.isCloseAfterRecursion()&& !stopCondition()){ 128 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeCloseAfterRecursion").start(); 129 List<InstanceNode> l = getInstanceNodes(newNodes); 130 logger.info("Getting classes for remaining instances: "+l.size() + " instances"); 131 tupleAquisitor.setNextTaskToClassesForInstances(); 132 collectNodes.addAll(expandCloseAfterRecursion(l, tupleAquisitor)); 133 m.stop(); 134 } 135 // gets All Class Nodes and expands them further 136 if (configuration.isGetAllSuperClasses()&& !stopCondition()) { 137 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetAllSuperClasses").start(); 138 List<ClassNode> allClassNodes = getClassNodes(collectNodes); 139 tupleAquisitor.setNextTaskToClassInformation(); 140 logger.info("Get all superclasses for "+allClassNodes.size() + " classes"); 141 expandAllSuperClassesOfANode(allClassNodes, tupleAquisitor); 142 m.stop(); 143 } 144 145 146 if(configuration.isGetPropertyInformation()&& !stopCondition() ){ 147 collectNodes.add(seedNode); 148 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetPropertyInformation").start(); 149 List<ObjectPropertyNode> objectProperties = getObjectPropertyNodes(collectNodes); 150 logger.info("Get info for "+objectProperties.size() + " objectProperties"); 151 for (ObjectPropertyNode node : objectProperties) { 152 if(stopCondition()){ 153 break; 154 } 155 collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes())); 156 } 157 List<DatatypePropertyNode> datatypeProperties = getDatatypeProperties(collectNodes); 158 logger.info("Get info for "+datatypeProperties.size() + " datatypeProperties"); 159 for (DatatypePropertyNode node : datatypeProperties) { 160 if(stopCondition()){ 161 break; 162 } 163 collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes())); 164 } 165 m.stop(); 166 } 167 168 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBlankNode").start(); 169 if( configuration.isDissolveBlankNodes() && !stopCondition()){ 170 expandBlankNodes(getBlankNodes(collectNodes),tupleAquisitor); 171 } 172 m.stop(); 173 174 175 return seedNode; 176 177 } 178 179 private List<Node> expandBlankNodes(List<BlankNode> blankNodes, TupleAquisitor tupelAquisitor) { 180 List<Node> newNodes = new ArrayList<>(); 181 while (!blankNodes.isEmpty()&& !stopCondition()) { 182 Node next = blankNodes.remove(0); 183 List<Node> l = next.expand(tupelAquisitor, configuration.getManipulator()); 184 for (Node node : l) { 185 blankNodes.add((BlankNode) node); 186 } 187 188 } 189 return newNodes; 190 } 191 192 193 private List<Node> expandCloseAfterRecursion(List<InstanceNode> instanceNodes, TupleAquisitor tupelAquisitor) { 194 195 List<Node> newNodes = new ArrayList<>(); 196 tupelAquisitor.setNextTaskToClassesForInstances(); 197 while (!instanceNodes.isEmpty() && !stopCondition()) { 198 logger.trace("Getting classes for remaining instances: " 199 + instanceNodes.size()); 200 Node next = instanceNodes.remove(0); 201 if(next.isExpanded()){ 202 JamonMonitorLogger.increaseCount(this.getClass(), "skipped nodes"); 203 continue; 204 } 205 logger.trace("Getting classes for: " + next); 206 newNodes.addAll(next.expand(tupelAquisitor, configuration.getManipulator())); 207 if (newNodes.size() >= configuration.getBreakSuperClassesAfter()) { 208 break; 209 }//endif 210 }//endwhile 211 212 return newNodes; 213 } 214 215 private void expandAllSuperClassesOfANode(List<ClassNode> allClassNodes, TupleAquisitor tupelAquisitor) { 216 217 218 List<Node> newClasses = new ArrayList<>(); 219 newClasses.addAll(allClassNodes); 220 //TODO LinkedData incompatibility 221 222 int i = 0; 223 224 while (!newClasses.isEmpty() && !stopCondition()) { 225 logger.trace("Remaining classes: " + newClasses.size()); 226 Node next = newClasses.remove(0); 227 228 logger.trace("Getting Superclasses for: " + next); 229 230 if (!alreadyQueriedSuperClasses.contains(next.getURIString())) { 231 logger.trace("" + next+" not in cache retrieving"); 232 alreadyQueriedSuperClasses.add(next.getURIString()); 233 tupelAquisitor.setNextTaskToClassInformation(); 234 235 newClasses.addAll(next.expand(tupelAquisitor, configuration.getManipulator())); 236 237 238 239 if (i > configuration.getBreakSuperClassesAfter()) { 240 break; 241 }//endinnerif 242 i++; 243 }//endouterif 244 else { 245 logger.trace("" + next+" in mem cache skipping"); 246 } 247 248 }//endwhile 249 if(!configuration.isOptimizeForDLLearner()){ 250 alreadyQueriedSuperClasses.clear(); 251 } 252 253 } 254 255 private static List<ClassNode> getClassNodes(List<Node> l ){ 256 List<ClassNode> retList = new ArrayList<>(); 257 for (Node node : l) { 258 if (node instanceof ClassNode) { 259 retList.add( (ClassNode) node); 260 261 } 262 263 } 264 return retList; 265 } 266 267 268 private static List<InstanceNode> getInstanceNodes(List<Node> l ){ 269 List<InstanceNode> retList = new ArrayList<>(); 270 for (Node node : l) { 271 if (node instanceof InstanceNode) { 272 retList.add( (InstanceNode) node); 273 274 } 275 276 } 277 return retList; 278 } 279 280 private static List<BlankNode> getBlankNodes(List<Node> l ){ 281 List<BlankNode> retList = new ArrayList<>(); 282 for (Node node : l) { 283 if (node instanceof BlankNode) { 284 retList.add( (BlankNode) node); 285 286 } 287 288 } 289 return retList; 290 } 291 292 private static List<ObjectPropertyNode> getObjectPropertyNodes(List<Node> l ){ 293 List<ObjectPropertyNode> properties = new ArrayList<>(); 294 for (Node node : l) { 295 if (node instanceof InstanceNode) { 296 properties.addAll(( (InstanceNode) node).getObjectProperties()); 297 298 } 299 300 } 301 return properties; 302 } 303 304 private static List<DatatypePropertyNode> getDatatypeProperties(List<Node> l ){ 305 List<DatatypePropertyNode> properties = new ArrayList<>(); 306 for (Node node : l) { 307 if (node instanceof InstanceNode) { 308 properties.addAll(( (InstanceNode) node).getDatatypePropertyNode()); 309 } 310 311 } 312 return properties; 313 } 314 315}