001/** 002 * Copyright (C) 2007-2011, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 * 019 */ 020package org.dllearner.cli; 021 022import org.apache.jena.query.QuerySolution; 023import org.apache.jena.query.ResultSet; 024import org.apache.jena.rdf.model.Model; 025import org.apache.log4j.ConsoleAppender; 026import org.apache.log4j.Level; 027import org.apache.log4j.Logger; 028import org.apache.log4j.SimpleLayout; 029import org.dllearner.cli.Enrichment.AlgorithmRun; 030import org.dllearner.core.ComponentInitException; 031import org.dllearner.core.LearningProblemUnsupportedException; 032import org.dllearner.kb.SparqlEndpointKS; 033import org.dllearner.kb.sparql.SparqlEndpoint; 034import org.dllearner.kb.sparql.SparqlQuery; 035import org.dllearner.utilities.Files; 036import org.semanticweb.owlapi.model.OWLAxiom; 037 038import java.io.*; 039import java.lang.reflect.InvocationTargetException; 040import java.net.MalformedURLException; 041import java.net.URL; 042import java.util.*; 043import java.util.Map.Entry; 044import java.util.concurrent.ArrayBlockingQueue; 045import java.util.concurrent.ThreadPoolExecutor; 046import java.util.concurrent.TimeUnit; 047 048/** 049 * Enriches all of the LOD cloud. 050 * 051 * @author Jens Lehmann 052 * 053 */ 054public class GlobalEnrichment { 055 056 //whether or not to skip endpoints which caused exceptions in a run before 057 private static boolean skipFailedEndpoints = true; 058 //whether or not to skip endpoints which returned no axioms during the learning process 059 private static boolean skipEmptyEndpoints = true; 060 //whether or not to skip endpoints on which we could learn something 061 private static boolean skipSuccessfulEndpoints = true; 062 063 // parameters 064 private static double threshold = 0.8; 065 private static int nrOfAxiomsToLearn = 10; 066 private static int queryChunkSize = 1000; 067 private static int maxExecutionTimeInSeconds = 10; 068 private static boolean useInference = true; 069 private static boolean omitExistingAxioms = false; 070 071 // directory for generated schemata 072 private static String baseDir = "log/lod-enriched/"; 073 074 075 //parameters for thread pool 076 //Parallel running Threads(Executor) on System 077 private static int corePoolSize = 1; 078 //Maximum Threads allowed in Pool 079 private static int maximumPoolSize = 20; 080 //Keep alive time for waiting threads for jobs(Runnable) 081 private static long keepAliveTime = 10; 082 083 /** 084 * @param args 085 * @throws MalformedURLException 086 * @throws LearningProblemUnsupportedException 087 * @throws NoSuchMethodException 088 * @throws InvocationTargetException 089 * @throws IllegalAccessException 090 * @throws InstantiationException 091 * @throws ComponentInitException 092 * @throws SecurityException 093 * @throws IllegalArgumentException 094 * @throws FileNotFoundException 095 */ 096 public static void main(String[] args) throws MalformedURLException, IllegalArgumentException, SecurityException, ComponentInitException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, LearningProblemUnsupportedException, FileNotFoundException { 097 new File(baseDir).mkdirs(); 098 099 new File(baseDir).mkdirs(); 100 101 SimpleLayout layout = new SimpleLayout(); 102 ConsoleAppender consoleAppender = new ConsoleAppender(layout); 103 Logger.getRootLogger().setLevel(Level.WARN); 104 Logger.getLogger("org.dllearner").setLevel(Level.WARN); // seems to be needed for some reason (?) 105 Logger.getRootLogger().removeAllAppenders(); 106 Logger.getRootLogger().addAppender(consoleAppender); 107 108 // get all SPARQL endpoints and their graphs - the key is a name-identifier 109 Map<String,SparqlEndpoint> endpoints = new TreeMap<>(); 110 111 String query = ""; 112 query += "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n"; 113 query += "PREFIX void: <http://rdfs.org/ns/void#> \n"; 114 query += "PREFIX dcterms: <http://purl.org/dc/terms/> \n"; 115 query += "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n"; 116 query += "PREFIX ov: <http://open.vocab.org/terms/> \n"; 117 query += "SELECT * \n"; 118 query += "WHERE { \n"; 119 query += " ?item rdf:type void:Dataset . \n"; 120 query += " ?item dcterms:isPartOf <http://ckan.net/group/lodcloud> . \n"; 121 query += " ?item void:sparqlEndpoint ?endpoint . \n"; 122// query += " ?item dcterms:subject ?subject . \n"; 123// query += " ?item rdfs:label ?label . \n"; 124 query += " ?item ov:shortName ?shortName . \n"; 125 query += "}"; 126// query += "LIMIT 20"; 127 System.out.println("Getting list of SPARQL endpoints from LATC DSI:"); 128 System.out.println(query); 129 130 // contact LATC DSI/MDS 131 SparqlEndpoint dsi = new SparqlEndpoint(new URL("http://api.talis.com/stores/latc-mds/services/sparql")); 132 SparqlQuery sq = new SparqlQuery(query, dsi); 133 ResultSet rs = sq.send(); 134 while(rs.hasNext()) { 135 QuerySolution qs = rs.next(); 136 String endpoint = qs.get("endpoint").toString(); 137 String shortName = qs.get("shortName").toString(); 138 endpoints.put(shortName, new SparqlEndpoint(new URL(endpoint))); 139 } 140 System.out.println(endpoints.size() + " endpoints detected."); 141 142 TreeSet<String> blacklist = new TreeSet<>(); 143 blacklist.add("rkb-explorer-crime"); // computation never completes 144 145 //remove endpoints which failed in a run before 146 if(skipFailedEndpoints){ 147 for(String name : getErrorList()){ 148 endpoints.remove(name); 149 } 150 } 151 if(skipEmptyEndpoints){ 152 for(String name : getEmptyList()){ 153 endpoints.remove(name); 154 } 155 } 156 if(skipSuccessfulEndpoints){ 157 for(String name : getSuccessList()){ 158 endpoints.remove(name); 159 } 160 } 161 162 ArrayBlockingQueue<Runnable> workQueue = new ArrayBlockingQueue<>(endpoints.size()); 163 ThreadPoolExecutor threadPool = new ThreadPoolExecutor(corePoolSize, maximumPoolSize, keepAliveTime, TimeUnit.SECONDS, workQueue); 164 165 166 // perform enrichment on endpoints 167 for(final Entry<String,SparqlEndpoint> endpoint : endpoints.entrySet()) { 168 169 threadPool.execute(new Runnable() { 170 171 @Override 172 public void run() { 173 // run enrichment 174 SparqlEndpoint se = endpoint.getValue(); 175 String name = endpoint.getKey(); 176 177 File f = new File(baseDir + File.separator + "success" + File.separator + name + ".ttl"); 178 File log = new File(baseDir + File.separator + "failed" + File.separator + name + ".log"); 179 180 System.out.println("Enriching " + name + " using " + se.getURL()); 181 Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, 182 false, queryChunkSize, maxExecutionTimeInSeconds, omitExistingAxioms); 183 184 e.maxEntitiesPerType = 3; // hack for faster testing of endpoints 185 186// if(blacklist.contains(name)) { 187// continue; 188// } 189 190 boolean success = false; 191 // run enrichment script - we make a case distinguish to see which kind of problems we get 192 // (could be interesting for statistics later on) 193 try { 194 try { 195 e.start(); 196 success = true; 197 } catch (Exception ex){ 198 write2File(ex, se); 199 ex.printStackTrace(); 200 ex.printStackTrace(new PrintStream(log)); 201 } catch(StackOverflowError error) { 202 error.printStackTrace(new PrintStream(log)); 203 Files.appendToFile(log, "stack overflows could be caused by cycles in class hierarchies"); 204 error.printStackTrace(); 205 } 206 } catch (FileNotFoundException e2) { 207 e2.printStackTrace(); 208 } 209 /*catch(ResultSetException ex) { 210 try { 211 ex.printStackTrace(new PrintStream(log)); 212 } catch (FileNotFoundException e1) { 213 // TODO Auto-generated catch block 214 e1.printStackTrace(); 215 } 216 Files.appendToFile(log, ex.getMessage()); 217 ex.printStackTrace(); 218 } catch(QueryExceptionHTTP ex) { 219 try { 220 ex.printStackTrace(new PrintStream(log)); 221 } catch (FileNotFoundException e1) { 222 // TODO Auto-generated catch block 223 e1.printStackTrace(); 224 } 225 Files.appendToFile(log, ex.getMessage()); 226 ex.printStackTrace(); 227 } 228 catch(Exception ex) { 229 System.out.println("class of exception: " + ex.getClass()); 230 }*/ 231 232 // save results to a file (TODO: check if enrichment format 233 if(success) { 234 SparqlEndpointKS ks = new SparqlEndpointKS(se); 235 List<AlgorithmRun> runs = e.getAlgorithmRuns(); 236 List<OWLAxiom> axioms = new LinkedList<>(); 237 int axiomCnt = 0; 238 for(AlgorithmRun run : runs) { 239 axiomCnt += e.getGeneratedOntology().getLogicalAxiomCount(); 240 axioms.addAll(e.toRDF(run.getAxioms(), run.getAlgorithm(), run.getParameters(), ks)); 241 } 242 Model model = e.getModel(axioms); 243 try { 244 if(axiomCnt == 0){ 245 f = f = new File(baseDir + File.separator + "success/empty" + File.separator + name + ".ttl"); 246 } 247 model.write(new FileOutputStream(f), "TURTLE"); 248 } catch (FileNotFoundException e1) { 249 // TODO Auto-generated catch block 250 e1.printStackTrace(); 251 } 252 } 253 254 } 255 }); 256 257 } 258 threadPool.shutdown(); 259 } 260 261 public static void write2File(Exception e, SparqlEndpoint endpoint) { 262 try { 263 File file = new File(baseDir + File.separator + "errors" + File.separator + e.getClass().getName()); 264 if(!file.exists()){ 265 file.createNewFile(); 266 } 267 FileWriter fw = new FileWriter(file, true); 268 fw.append(endpoint.getURL().toString()).append("\n"); 269 fw.flush(); 270 fw.close(); 271 } catch (Exception ex) { 272 ex.printStackTrace(); 273 } 274 } 275 276 public static List<String> getErrorList(){ 277 List<String> errorNames = new ArrayList<>(); 278 File dir = new File(baseDir + "/failed/"); 279 dir.mkdirs(); 280 for(File file : dir.listFiles()){ 281 errorNames.add(file.getName().replace(".log", "")); 282 } 283 return errorNames; 284 } 285 286 public static List<String> getEmptyList(){ 287 List<String> errorNames = new ArrayList<>(); 288 File dir = new File(baseDir + "/success/empty/"); 289 dir.mkdirs(); 290 for(File file : dir.listFiles()){ 291 errorNames.add(file.getName().replace(".ttl", "")); 292 } 293 return errorNames; 294 } 295 296 public static List<String> getSuccessList(){ 297 List<String> errorNames = new ArrayList<>(); 298 File dir = new File(baseDir + "/success/"); 299 dir.mkdirs(); 300 for(File file : dir.listFiles()){ 301 errorNames.add(file.getName().replace(".ttl", "")); 302 } 303 return errorNames; 304 } 305 306}