001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.dataset; 020 021import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; 022import com.google.common.io.Files; 023import org.semanticweb.HermiT.Configuration; 024import org.semanticweb.HermiT.Reasoner; 025import org.semanticweb.owlapi.apibinding.OWLManager; 026import org.semanticweb.owlapi.model.OWLClass; 027import org.semanticweb.owlapi.model.OWLOntology; 028import org.semanticweb.owlapi.model.OWLOntologyManager; 029import org.semanticweb.owlapi.reasoner.ConsoleProgressMonitor; 030import org.semanticweb.owlapi.reasoner.OWLReasoner; 031import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; 032import org.semanticweb.owlapi.reasoner.SimpleConfiguration; 033 034import java.io.*; 035import java.net.MalformedURLException; 036import java.net.URL; 037import java.nio.charset.Charset; 038import java.util.*; 039import java.util.Map.Entry; 040import java.util.concurrent.ExecutorService; 041import java.util.concurrent.Executors; 042import java.util.concurrent.TimeUnit; 043 044public abstract class AbstractOWLOntologyDataset implements AnalyzedOWLOntologyDataset{ 045 046 protected Collection<OWLOntology> ontologies = new TreeSet<>(); 047 protected Collection<OWLOntology> correctOntologies = new TreeSet<>(); 048 protected Collection<OWLOntology> incoherentOntologies = new TreeSet<>(); 049 protected Collection<OWLOntology> inconsistentOntologies = new TreeSet<>(); 050 051 protected String name; 052 053 protected File directory; 054 protected File correctSubdirectory; 055 protected File inconsistentSubdirectory; 056 protected File incoherentSubdirectory; 057 protected File errorSubdirectory; 058 protected File tooLargeSubdirectory; 059 060 protected OWLReasonerFactory reasonerFactory = PelletReasonerFactory.getInstance(); 061 OWLOntologyManager man = OWLManager.createOWLOntologyManager(); 062 063 protected Map<URL, String> ontologyURLs = new HashMap<>(); 064 065 private static final int nrOfThreads = 1; 066 private boolean analyze = false; 067 068 public AbstractOWLOntologyDataset(File datasetDirectory, String name, boolean analyze) { 069 this.name = name; 070 this.analyze = analyze; 071 //create file structure 072 directory = new File(datasetDirectory, name); 073 directory.mkdirs(); 074 correctSubdirectory = new File(directory, "correct"); 075 correctSubdirectory.mkdirs(); 076 incoherentSubdirectory = new File(directory, "incoherent"); 077 incoherentSubdirectory.mkdirs(); 078 inconsistentSubdirectory = new File(directory, "inconsistent"); 079 inconsistentSubdirectory.mkdirs(); 080 tooLargeSubdirectory = new File(directory, "too_large"); 081 tooLargeSubdirectory.mkdirs(); 082 errorSubdirectory = new File(directory, "error"); 083 errorSubdirectory.mkdirs(); 084 addOntologyURLs(); 085 initialize(); 086 } 087 088 public AbstractOWLOntologyDataset(File datasetDirectory, String name) { 089 this(datasetDirectory, name, false); 090 } 091 092 private boolean analyzed(URL url){ 093 String filename = getFilename(url); 094 for(File parent : Arrays.asList(tooLargeSubdirectory, correctSubdirectory, incoherentSubdirectory, inconsistentSubdirectory, errorSubdirectory)){ 095 File file = new File(parent, filename); 096 if(file.exists()){ 097 return true; 098 } 099 } 100 return false; 101 } 102 103 private Set<String> load403Errors(){ 104 Set<String> errors = new HashSet<>(); 105 try { 106 if(new File(directory, "403.txt").exists()){ 107 errors = new HashSet<>(Files.readLines(new File(directory, "403.txt"), Charset.defaultCharset())); 108 } 109 } catch (IOException e) { 110 e.printStackTrace(); 111 } 112 return errors; 113 } 114 115 private boolean analyzedDataset(){ 116 return new File(directory + "/" + "analyzed").exists(); 117 } 118 119 public void initialize(){ 120 //check if dataset was already analyzed 121 boolean analyzed = analyzedDataset(); 122 if(!analyzed){ 123 Set<String> errors = load403Errors(); 124 ExecutorService threadPool = Executors.newFixedThreadPool(nrOfThreads); 125 List<Entry<URL, String>> urlList = new ArrayList<>(ontologyURLs.entrySet()); 126 Collections.shuffle(urlList); 127 for (java.util.Map.Entry<URL, String> entry : urlList) { 128 URL url = entry.getKey(); 129 if(!errors.contains(url.toString()) && !analyzed(url)){ 130 threadPool.submit(new OntologyLoadingTask(url)); 131 } 132 } 133 threadPool.shutdown(); 134 try { 135 threadPool.awaitTermination(100, TimeUnit.MINUTES); 136 } catch (InterruptedException e) { 137 e.printStackTrace(); 138 } 139 try { 140 new File(directory + "/" + "analyzed").createNewFile(); 141 } catch (IOException e) { 142 e.printStackTrace(); 143 } 144 } else { 145 146 } 147 } 148 149 protected abstract void addOntologyURLs(); 150 151 private void analyzeAndCategorizeOntology(OWLOntology ontology, String filename){ 152 System.out.println("Analyzing ontology " + filename + "..."); 153 OWLReasoner reasoner; 154 File from = new File(man.getOntologyDocumentIRI(ontology).toURI()); 155 try { 156 Configuration conf = new Configuration(); 157 conf.reasonerProgressMonitor = new ConsoleProgressMonitor(); 158 conf.ignoreUnsupportedDatatypes = true; 159 reasoner = new Reasoner(conf, ontology); 160 int logicalAxiomCount = ontology.getLogicalAxiomCount(); 161 boolean consistent = reasoner.isConsistent(); 162 Set<OWLClass> unsatisfiableClasses = null; 163 164 if(consistent){ 165 unsatisfiableClasses = reasoner.getUnsatisfiableClasses().getEntitiesMinusBottom(); 166 if(!unsatisfiableClasses.isEmpty()){ 167 File to = new File(incoherentSubdirectory, filename); 168 Files.move(from, to); 169 } else { 170 File to = new File(correctSubdirectory, filename); 171 Files.move(from, to); 172 } 173 } else { 174 File to = new File(inconsistentSubdirectory, filename); 175 Files.move(from, to); 176 } 177 System.out.println(consistent + "\t" + logicalAxiomCount + "\t" + ((unsatisfiableClasses != null) ? unsatisfiableClasses.size() : "n/a")); 178 reasoner.dispose(); 179 } catch (Exception e){ 180 e.printStackTrace(); 181 try { 182 reasoner = reasonerFactory.createNonBufferingReasoner(ontology, new SimpleConfiguration(new ConsoleProgressMonitor())); 183 int logicalAxiomCount = ontology.getLogicalAxiomCount(); 184 boolean consistent = reasoner.isConsistent(); 185 Set<OWLClass> unsatisfiableClasses = null; 186 if(consistent){ 187 unsatisfiableClasses = reasoner.getUnsatisfiableClasses().getEntitiesMinusBottom(); 188 if(!unsatisfiableClasses.isEmpty()){ 189 File to = new File(incoherentSubdirectory, filename); 190 Files.move(from, to); 191 } else { 192 File to = new File(correctSubdirectory, filename); 193 Files.move(from, to); 194 } 195 } else { 196 File to = new File(inconsistentSubdirectory, filename); 197 Files.move(from, to); 198 } 199 System.out.println(consistent + "\t" + logicalAxiomCount + "\t" + ((unsatisfiableClasses != null) ? unsatisfiableClasses.size() : "n/a")); 200 reasoner.dispose(); 201 } catch (Exception e1){ 202 File to = new File(errorSubdirectory, filename); 203 try { 204 Files.move(from, to); 205 } catch (IOException e2) { 206 e2.printStackTrace(); 207 } 208 } 209 } 210 } 211 212 protected OWLOntology loadOWLOntology(URL url) { 213 OWLOntology ontology = loadFromLocal(url); 214 if(ontology == null){ 215 File file = null; 216 try { 217 file = downloadFile(url); 218 if(file != null){ 219 ontology = man.loadOntologyFromOntologyDocument(file); 220 } 221 } catch (Exception e) { 222 e.printStackTrace(); 223 String filename = getFilename(url); 224 File to = new File(errorSubdirectory, filename); 225 try { 226 Files.move(file, to); 227 } catch (IOException e2) { 228 e2.printStackTrace(); 229 } 230 } 231 } 232 return ontology; 233 } 234 235 private OWLOntology loadFromLocal(URL url){ 236 String filename = getFilename(url); 237 for(File parent : Collections.singletonList(directory)){ 238 File file = new File(parent, filename); 239 if(file.exists()){ 240 try { 241 return man.loadOntologyFromOntologyDocument(file); 242 } catch(Exception e){ 243 e.printStackTrace(); 244 File to = new File(errorSubdirectory, filename); 245 try { 246 Files.move(file, to); 247 } catch (IOException e2) { 248 e2.printStackTrace(); 249 } 250 } 251 } 252 } 253 return null; 254 } 255 256 private String getFilename(URL url){ 257 return ontologyURLs.get(url); 258// String filename = url.toString().substring(url.toString().lastIndexOf("/")); 259// return filename; 260 } 261 262 /** 263 * Download the file such that later on we can load it from the local file system. 264 */ 265 protected File downloadFile(URL url){ 266 267 String filename = getFilename(url); 268 File file = new File(directory + "/" + filename); 269 if(!file.exists()){ 270 System.out.print("Downloading file..."); 271 try { 272 InputStream is = url.openConnection().getInputStream(); 273 OutputStream out = new FileOutputStream(file); 274 int read = 0; 275 byte[] bytes = new byte[1024]; 276 277 while ((read = is.read(bytes)) != -1) { 278 out.write(bytes, 0, read); 279 } 280 281 is.close(); 282 out.flush(); 283 out.close(); 284 System.out.println("done."); 285 return file; 286 } catch (MalformedURLException | FileNotFoundException e) { 287 e.printStackTrace(); 288 } catch (IOException e) { 289 e.printStackTrace(); 290 add403Error(url); 291 } 292 } 293 return null; 294 } 295 296 private void add403Error(URL url){ 297 org.dllearner.utilities.Files.appendToFile(new File(directory, "403.txt"), url.toString() + "\n"); 298 } 299 300 @Override 301 public Collection<OWLOntology> loadOntologies() { 302 return ontologies; 303 } 304 305 @Override 306 public Collection<OWLOntology> loadIncoherentOntologies() { 307 return incoherentOntologies; 308 } 309 310 @Override 311 public Collection<OWLOntology> loadInconsistentOntologies() { 312 return inconsistentOntologies; 313 } 314 315 class OntologyLoadingTask implements Runnable{ 316 317 private URL url; 318 319 public OntologyLoadingTask(URL url) { 320 this.url = url; 321 } 322 323 @Override 324 public void run() { 325 System.out.println("Processing " + ontologyURLs.get(url)); 326 OWLOntology ontology = loadOWLOntology(url); 327 if(ontology != null){ 328 if(analyze){ 329 analyzeAndCategorizeOntology(ontology, getFilename(url)); 330 } else { 331 ontologies.add(ontology); 332 } 333 } 334 } 335 336 } 337 338}