001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.dataset;
020
021import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory;
022import com.google.common.io.Files;
023import org.semanticweb.HermiT.Configuration;
024import org.semanticweb.HermiT.Reasoner;
025import org.semanticweb.owlapi.apibinding.OWLManager;
026import org.semanticweb.owlapi.model.OWLClass;
027import org.semanticweb.owlapi.model.OWLOntology;
028import org.semanticweb.owlapi.model.OWLOntologyManager;
029import org.semanticweb.owlapi.reasoner.ConsoleProgressMonitor;
030import org.semanticweb.owlapi.reasoner.OWLReasoner;
031import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
032import org.semanticweb.owlapi.reasoner.SimpleConfiguration;
033
034import java.io.*;
035import java.net.MalformedURLException;
036import java.net.URL;
037import java.nio.charset.Charset;
038import java.util.*;
039import java.util.Map.Entry;
040import java.util.concurrent.ExecutorService;
041import java.util.concurrent.Executors;
042import java.util.concurrent.TimeUnit;
043
044public abstract class AbstractOWLOntologyDataset implements AnalyzedOWLOntologyDataset{
045        
046        protected Collection<OWLOntology> ontologies = new TreeSet<>();
047        protected Collection<OWLOntology> correctOntologies = new TreeSet<>();
048        protected Collection<OWLOntology> incoherentOntologies = new TreeSet<>();
049        protected Collection<OWLOntology> inconsistentOntologies = new TreeSet<>();
050        
051        protected String name;
052        
053        protected File directory;
054        protected File correctSubdirectory;
055        protected File inconsistentSubdirectory;
056        protected File incoherentSubdirectory;
057        protected File errorSubdirectory;
058        protected File tooLargeSubdirectory;
059        
060        protected OWLReasonerFactory reasonerFactory = PelletReasonerFactory.getInstance();
061        OWLOntologyManager man = OWLManager.createOWLOntologyManager();
062        
063        protected Map<URL, String> ontologyURLs = new HashMap<>();
064        
065        private static final int nrOfThreads = 1;
066        private boolean analyze = false;
067        
068        public AbstractOWLOntologyDataset(File datasetDirectory, String name, boolean analyze) {
069                this.name = name;
070                this.analyze = analyze;
071                //create file structure
072                directory = new File(datasetDirectory, name);
073                directory.mkdirs();
074                correctSubdirectory = new File(directory, "correct");
075                correctSubdirectory.mkdirs();
076                incoherentSubdirectory = new File(directory, "incoherent");
077                incoherentSubdirectory.mkdirs();
078                inconsistentSubdirectory = new File(directory, "inconsistent");
079                inconsistentSubdirectory.mkdirs();
080                tooLargeSubdirectory = new File(directory, "too_large");
081                tooLargeSubdirectory.mkdirs();
082                errorSubdirectory = new File(directory, "error");
083                errorSubdirectory.mkdirs();
084                addOntologyURLs();
085                initialize();
086        }
087        
088        public AbstractOWLOntologyDataset(File datasetDirectory, String name) {
089                this(datasetDirectory, name, false);
090        }
091        
092        private boolean analyzed(URL url){
093                String filename = getFilename(url);
094                for(File parent : Arrays.asList(tooLargeSubdirectory, correctSubdirectory, incoherentSubdirectory, inconsistentSubdirectory, errorSubdirectory)){
095                        File file = new File(parent, filename);
096                        if(file.exists()){
097                                return true;
098                        }
099                }
100                return false;
101        }
102        
103        private Set<String> load403Errors(){
104                Set<String> errors = new HashSet<>();
105                try {
106                        if(new File(directory, "403.txt").exists()){
107                                errors = new HashSet<>(Files.readLines(new File(directory, "403.txt"), Charset.defaultCharset()));
108                        }
109                } catch (IOException e) {
110                        e.printStackTrace();
111                }
112                return errors;
113        }
114        
115        private boolean analyzedDataset(){
116                return new File(directory + "/" + "analyzed").exists();
117        }
118
119        public void initialize(){
120                //check if dataset was already analyzed
121                boolean analyzed = analyzedDataset();
122                if(!analyzed){
123                        Set<String> errors = load403Errors();
124                        ExecutorService threadPool = Executors.newFixedThreadPool(nrOfThreads);
125                        List<Entry<URL, String>> urlList = new ArrayList<>(ontologyURLs.entrySet());
126                        Collections.shuffle(urlList);
127                        for (java.util.Map.Entry<URL, String> entry : urlList) {
128                                URL url = entry.getKey();
129                                if(!errors.contains(url.toString()) && !analyzed(url)){
130                                        threadPool.submit(new OntologyLoadingTask(url));
131                                }
132                        }
133                        threadPool.shutdown();
134                        try {
135                                threadPool.awaitTermination(100, TimeUnit.MINUTES);
136                        } catch (InterruptedException e) {
137                                e.printStackTrace();
138                        }
139                        try {
140                                new File(directory + "/" + "analyzed").createNewFile();
141                        } catch (IOException e) {
142                                e.printStackTrace();
143                        }
144                } else {
145                        
146                }
147        }
148        
149        protected abstract void addOntologyURLs();
150        
151        private void analyzeAndCategorizeOntology(OWLOntology ontology, String filename){
152                System.out.println("Analyzing ontology " + filename + "...");
153                OWLReasoner reasoner;
154                File from = new File(man.getOntologyDocumentIRI(ontology).toURI());
155                try {
156                        Configuration conf = new Configuration();
157                        conf.reasonerProgressMonitor = new ConsoleProgressMonitor();
158                        conf.ignoreUnsupportedDatatypes = true;
159                        reasoner = new Reasoner(conf, ontology);
160                        int logicalAxiomCount = ontology.getLogicalAxiomCount();
161                        boolean consistent = reasoner.isConsistent();
162                        Set<OWLClass> unsatisfiableClasses = null;
163                        
164                        if(consistent){
165                                unsatisfiableClasses = reasoner.getUnsatisfiableClasses().getEntitiesMinusBottom();
166                                if(!unsatisfiableClasses.isEmpty()){
167                                        File to = new File(incoherentSubdirectory, filename);
168                                        Files.move(from, to);
169                                } else {
170                                        File to = new File(correctSubdirectory, filename);
171                                        Files.move(from, to);
172                                }
173                        } else {
174                                File to = new File(inconsistentSubdirectory, filename);
175                                Files.move(from, to);
176                        }
177                        System.out.println(consistent + "\t" + logicalAxiomCount + "\t" + ((unsatisfiableClasses != null) ? unsatisfiableClasses.size() : "n/a"));
178                        reasoner.dispose();
179                } catch (Exception e){
180                        e.printStackTrace();
181                        try {
182                                reasoner = reasonerFactory.createNonBufferingReasoner(ontology, new SimpleConfiguration(new ConsoleProgressMonitor()));
183                                int logicalAxiomCount = ontology.getLogicalAxiomCount();
184                                boolean consistent = reasoner.isConsistent();
185                                Set<OWLClass> unsatisfiableClasses = null;
186                                if(consistent){
187                                        unsatisfiableClasses = reasoner.getUnsatisfiableClasses().getEntitiesMinusBottom();
188                                        if(!unsatisfiableClasses.isEmpty()){
189                                                File to = new File(incoherentSubdirectory, filename);
190                                                Files.move(from, to);
191                                        } else {
192                                                File to = new File(correctSubdirectory, filename);
193                                                Files.move(from, to);
194                                        }
195                                } else {
196                                        File to = new File(inconsistentSubdirectory, filename);
197                                        Files.move(from, to);
198                                }
199                                System.out.println(consistent + "\t" + logicalAxiomCount + "\t" + ((unsatisfiableClasses != null) ? unsatisfiableClasses.size() : "n/a"));
200                                reasoner.dispose();
201                        } catch (Exception e1){
202                                File to = new File(errorSubdirectory, filename);
203                                try {
204                                        Files.move(from, to);
205                                } catch (IOException e2) {
206                                        e2.printStackTrace();
207                                }
208                        }
209                }
210        }
211        
212        protected OWLOntology loadOWLOntology(URL url) {
213                OWLOntology ontology = loadFromLocal(url);
214                if(ontology == null){
215                        File file = null;
216                        try {
217                                file = downloadFile(url);
218                                if(file != null){
219                                        ontology = man.loadOntologyFromOntologyDocument(file);
220                                }
221                        } catch (Exception e) {
222                                e.printStackTrace();
223                                String filename = getFilename(url);
224                                File to = new File(errorSubdirectory, filename);
225                                try {
226                                        Files.move(file, to);
227                                } catch (IOException e2) {
228                                        e2.printStackTrace();
229                                }
230                        }
231                }
232                return ontology;
233        }
234        
235        private OWLOntology loadFromLocal(URL url){
236                String filename = getFilename(url);
237                for(File parent : Collections.singletonList(directory)){
238                        File file = new File(parent, filename);
239                        if(file.exists()){
240                                try {
241                                        return man.loadOntologyFromOntologyDocument(file);
242                                } catch(Exception e){
243                                        e.printStackTrace();
244                                        File to = new File(errorSubdirectory, filename);
245                                        try {
246                                                Files.move(file, to);
247                                        } catch (IOException e2) {
248                                                e2.printStackTrace();
249                                        }
250                                }
251                        }
252                }
253                return null;
254        }
255        
256        private String getFilename(URL url){
257                return ontologyURLs.get(url);
258//              String filename = url.toString().substring(url.toString().lastIndexOf("/"));
259//              return filename;
260        }
261        
262        /**
263         * Download the file such that later on we can load it from the local file system.
264         */
265        protected File downloadFile(URL url){
266                
267                String filename = getFilename(url);
268                File file = new File(directory + "/" + filename);
269                if(!file.exists()){
270                        System.out.print("Downloading file...");
271                        try {
272                                InputStream is = url.openConnection().getInputStream();
273                                OutputStream out = new FileOutputStream(file);
274                                int read = 0;
275                                byte[] bytes = new byte[1024];
276 
277                                while ((read = is.read(bytes)) != -1) {
278                                        out.write(bytes, 0, read);
279                                }
280 
281                                is.close();
282                                out.flush();
283                                out.close();
284                                System.out.println("done.");
285                                return file;
286                        } catch (MalformedURLException | FileNotFoundException e) {
287                                e.printStackTrace();
288                        } catch (IOException e) {
289                                e.printStackTrace();
290                                add403Error(url);
291                        }
292                }
293                return null;
294        }
295        
296        private void add403Error(URL url){
297                org.dllearner.utilities.Files.appendToFile(new File(directory, "403.txt"), url.toString() + "\n");
298        }
299        
300        @Override
301        public Collection<OWLOntology> loadOntologies() {
302                return ontologies;
303        }
304
305        @Override
306        public Collection<OWLOntology> loadIncoherentOntologies() {
307                return incoherentOntologies;
308        }
309
310        @Override
311        public Collection<OWLOntology> loadInconsistentOntologies() {
312                return inconsistentOntologies;
313        }
314        
315        class OntologyLoadingTask implements Runnable{
316                
317                private URL url;
318
319                public OntologyLoadingTask(URL url) {
320                        this.url = url;
321                }
322
323                @Override
324                public void run() {
325                        System.out.println("Processing " + ontologyURLs.get(url));
326                        OWLOntology ontology = loadOWLOntology(url);
327                        if(ontology != null){
328                                if(analyze){
329                                        analyzeAndCategorizeOntology(ontology, getFilename(url));
330                                } else {
331                                        ontologies.add(ontology);
332                                }
333                        }
334                }
335                
336        }
337
338}