001/**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 * 
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020package org.dllearner.cli;
021
022import org.apache.jena.query.QuerySolution;
023import org.apache.jena.query.ResultSet;
024import org.apache.jena.rdf.model.Model;
025import org.apache.log4j.ConsoleAppender;
026import org.apache.log4j.Level;
027import org.apache.log4j.Logger;
028import org.apache.log4j.SimpleLayout;
029import org.dllearner.cli.Enrichment.AlgorithmRun;
030import org.dllearner.core.ComponentInitException;
031import org.dllearner.core.LearningProblemUnsupportedException;
032import org.dllearner.kb.SparqlEndpointKS;
033import org.dllearner.kb.sparql.SparqlEndpoint;
034import org.dllearner.kb.sparql.SparqlQuery;
035import org.dllearner.utilities.Files;
036import org.semanticweb.owlapi.model.OWLAxiom;
037
038import java.io.*;
039import java.lang.reflect.InvocationTargetException;
040import java.net.MalformedURLException;
041import java.net.URL;
042import java.util.*;
043import java.util.Map.Entry;
044import java.util.concurrent.ArrayBlockingQueue;
045import java.util.concurrent.ThreadPoolExecutor;
046import java.util.concurrent.TimeUnit;
047
048/**
049 * Enriches all of the LOD cloud.
050 * 
051 * @author Jens Lehmann
052 * 
053 */
054public class GlobalEnrichment {
055        
056        //whether or not to skip endpoints which caused exceptions in a run before
057        private static boolean skipFailedEndpoints = true;
058        //whether or not to skip endpoints which returned no axioms during the learning process
059        private static boolean skipEmptyEndpoints = true;
060        //whether or not to skip endpoints on which we could learn something
061        private static boolean skipSuccessfulEndpoints = true;
062
063        // parameters
064        private static double threshold = 0.8;
065        private static int nrOfAxiomsToLearn = 10;
066        private static int queryChunkSize = 1000;
067        private static int maxExecutionTimeInSeconds = 10;
068        private static boolean useInference = true;
069        private static boolean omitExistingAxioms = false;
070        
071        // directory for generated schemata
072        private static String baseDir = "log/lod-enriched/";
073        
074        
075        //parameters for thread pool
076        //Parallel running Threads(Executor) on System
077        private static int corePoolSize = 1;
078        //Maximum Threads allowed in Pool
079        private static int maximumPoolSize = 20;
080        //Keep alive time for waiting threads for jobs(Runnable)
081        private static long keepAliveTime = 10;
082        
083        /**
084         * @param args
085         * @throws MalformedURLException 
086         * @throws LearningProblemUnsupportedException 
087         * @throws NoSuchMethodException 
088         * @throws InvocationTargetException 
089         * @throws IllegalAccessException 
090         * @throws InstantiationException 
091         * @throws ComponentInitException 
092         * @throws SecurityException 
093         * @throws IllegalArgumentException 
094         * @throws FileNotFoundException 
095         */
096        public static void main(String[] args) throws MalformedURLException, IllegalArgumentException, SecurityException, ComponentInitException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, LearningProblemUnsupportedException, FileNotFoundException {
097                new File(baseDir).mkdirs();
098                
099                new File(baseDir).mkdirs();
100                
101                SimpleLayout layout = new SimpleLayout();
102                ConsoleAppender consoleAppender = new ConsoleAppender(layout);
103                Logger.getRootLogger().setLevel(Level.WARN);
104                Logger.getLogger("org.dllearner").setLevel(Level.WARN); // seems to be needed for some reason (?)
105                Logger.getRootLogger().removeAllAppenders();
106                Logger.getRootLogger().addAppender(consoleAppender);            
107                
108                // get all SPARQL endpoints and their graphs - the key is a name-identifier
109                Map<String,SparqlEndpoint> endpoints = new TreeMap<>();
110                
111                String query = "";
112                query += "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n";
113                query += "PREFIX void: <http://rdfs.org/ns/void#> \n";
114                query += "PREFIX dcterms: <http://purl.org/dc/terms/> \n";
115                query += "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n";
116                query += "PREFIX ov: <http://open.vocab.org/terms/> \n";
117                query += "SELECT * \n";
118                query += "WHERE { \n";
119                query += "   ?item rdf:type void:Dataset . \n";
120                query += "   ?item dcterms:isPartOf <http://ckan.net/group/lodcloud> . \n";
121                query += "   ?item void:sparqlEndpoint ?endpoint . \n";
122//              query += "   ?item dcterms:subject ?subject . \n";
123//              query += "   ?item rdfs:label ?label . \n";
124                query += "   ?item ov:shortName ?shortName . \n";
125                query += "}";
126//              query += "LIMIT 20";
127                System.out.println("Getting list of SPARQL endpoints from LATC DSI:");
128                System.out.println(query);
129                
130                // contact LATC DSI/MDS
131                SparqlEndpoint dsi = new SparqlEndpoint(new URL("http://api.talis.com/stores/latc-mds/services/sparql"));
132                SparqlQuery sq = new SparqlQuery(query, dsi);
133                ResultSet rs = sq.send();
134                while(rs.hasNext()) {
135                        QuerySolution qs = rs.next();
136                        String endpoint = qs.get("endpoint").toString();
137                        String shortName = qs.get("shortName").toString();
138                        endpoints.put(shortName, new SparqlEndpoint(new URL(endpoint)));
139                }
140                System.out.println(endpoints.size() + " endpoints detected.");
141                
142                TreeSet<String> blacklist = new TreeSet<>();
143                blacklist.add("rkb-explorer-crime"); // computation never completes
144                
145                //remove endpoints which failed in a run before
146                if(skipFailedEndpoints){
147                        for(String name : getErrorList()){
148                                endpoints.remove(name);
149                        }
150                }
151                if(skipEmptyEndpoints){
152                        for(String name : getEmptyList()){
153                                endpoints.remove(name);
154                        }
155                }
156                if(skipSuccessfulEndpoints){
157                        for(String name : getSuccessList()){
158                                endpoints.remove(name);
159                        }
160                }
161                
162                ArrayBlockingQueue<Runnable> workQueue = new ArrayBlockingQueue<>(endpoints.size());
163                ThreadPoolExecutor threadPool = new ThreadPoolExecutor(corePoolSize, maximumPoolSize, keepAliveTime, TimeUnit.SECONDS, workQueue);
164                
165                
166                // perform enrichment on endpoints
167                for(final Entry<String,SparqlEndpoint> endpoint : endpoints.entrySet()) {
168                        
169                        threadPool.execute(new Runnable() {
170                                
171                                @Override
172                                public void run() {
173                                        // run enrichment
174                                        SparqlEndpoint se = endpoint.getValue();
175                                        String name = endpoint.getKey();
176                                        
177                                        File f = new File(baseDir + File.separator + "success" + File.separator + name + ".ttl"); 
178                                        File log = new File(baseDir + File.separator + "failed" + File.separator + name + ".log");
179                                        
180                                        System.out.println("Enriching " + name + " using " + se.getURL());
181                                        Enrichment e = new Enrichment(se, null, threshold, nrOfAxiomsToLearn, useInference, 
182                                                        false, queryChunkSize, maxExecutionTimeInSeconds, omitExistingAxioms);
183                                        
184                                        e.maxEntitiesPerType = 3; // hack for faster testing of endpoints
185                                        
186//                                      if(blacklist.contains(name)) {
187//                                              continue;
188//                                      }
189                                        
190                                        boolean success = false;
191                                        // run enrichment script - we make a case distinguish to see which kind of problems we get
192                                        // (could be interesting for statistics later on)
193                                        try {
194                                                try {
195                                                        e.start();
196                                                        success = true;
197                                                } catch (Exception ex){
198                                                        write2File(ex, se);
199                                                        ex.printStackTrace();
200                                                        ex.printStackTrace(new PrintStream(log));
201                                                } catch(StackOverflowError error) {
202                                                        error.printStackTrace(new PrintStream(log));
203                                                        Files.appendToFile(log, "stack overflows could be caused by cycles in class hierarchies");
204                                                        error.printStackTrace();
205                                                }
206                                        } catch (FileNotFoundException e2) {
207                                                e2.printStackTrace();
208                                        } 
209                                        /*catch(ResultSetException ex) {
210                                                try {
211                                                        ex.printStackTrace(new PrintStream(log));
212                                                } catch (FileNotFoundException e1) {
213                                                        // TODO Auto-generated catch block
214                                                        e1.printStackTrace();
215                                                }
216                                                Files.appendToFile(log, ex.getMessage());
217                                                ex.printStackTrace();
218                                        } catch(QueryExceptionHTTP ex) {
219                                                try {
220                                                        ex.printStackTrace(new PrintStream(log));
221                                                } catch (FileNotFoundException e1) {
222                                                        // TODO Auto-generated catch block
223                                                        e1.printStackTrace();
224                                                }
225                                                Files.appendToFile(log, ex.getMessage());
226                                                ex.printStackTrace();                           
227                                        } 
228                                        catch(Exception ex) {
229                                                System.out.println("class of exception: " + ex.getClass());
230                                        }*/
231                                        
232                                        // save results to a file (TODO: check if enrichment format 
233                                        if(success) {
234                                                SparqlEndpointKS ks = new SparqlEndpointKS(se);
235                                                List<AlgorithmRun> runs = e.getAlgorithmRuns();
236                                                List<OWLAxiom> axioms = new LinkedList<>();
237                                                int axiomCnt = 0;
238                                                for(AlgorithmRun run : runs) {
239                                                        axiomCnt += e.getGeneratedOntology().getLogicalAxiomCount();
240                                                        axioms.addAll(e.toRDF(run.getAxioms(), run.getAlgorithm(), run.getParameters(), ks));
241                                                }
242                                                Model model = e.getModel(axioms);                       
243                                                try {
244                                                        if(axiomCnt == 0){
245                                                                f = f = new File(baseDir + File.separator + "success/empty" + File.separator + name + ".ttl"); 
246                                                        }
247                                                        model.write(new FileOutputStream(f), "TURTLE");
248                                                } catch (FileNotFoundException e1) {
249                                                        // TODO Auto-generated catch block
250                                                        e1.printStackTrace();
251                                                }                               
252                                        }
253                                        
254                                }
255                        });
256                        
257                }
258                threadPool.shutdown();
259        }
260        
261        public static void write2File(Exception e, SparqlEndpoint endpoint) {
262                try {
263                        File file = new File(baseDir + File.separator + "errors" + File.separator + e.getClass().getName());
264                        if(!file.exists()){
265                                file.createNewFile();
266                        }
267                        FileWriter fw = new FileWriter(file, true);
268                        fw.append(endpoint.getURL().toString()).append("\n");
269                        fw.flush();
270                        fw.close();
271                } catch (Exception ex) {
272                        ex.printStackTrace();
273                }
274        }
275        
276        public static List<String> getErrorList(){
277                List<String> errorNames = new ArrayList<>();
278                File dir = new File(baseDir + "/failed/");
279                dir.mkdirs();
280                for(File file : dir.listFiles()){
281                        errorNames.add(file.getName().replace(".log", ""));
282                }
283                return errorNames;
284        }
285        
286        public static List<String> getEmptyList(){
287                List<String> errorNames = new ArrayList<>();
288                File dir = new File(baseDir + "/success/empty/");
289                dir.mkdirs();
290                for(File file : dir.listFiles()){
291                        errorNames.add(file.getName().replace(".ttl", ""));
292                }
293                return errorNames;
294        }
295        
296        public static List<String> getSuccessList(){
297                List<String> errorNames = new ArrayList<>();
298                File dir = new File(baseDir + "/success/");
299                dir.mkdirs();
300                for(File file : dir.listFiles()){
301                        errorNames.add(file.getName().replace(".ttl", ""));
302                }
303                return errorNames;
304        }
305
306}