001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.repository.bioportal; 020 021import com.google.common.base.Charsets; 022import com.google.common.base.StandardSystemProperty; 023import joptsimple.OptionParser; 024import joptsimple.OptionSet; 025import joptsimple.OptionSpec; 026import org.apache.commons.io.FileUtils; 027import org.apache.commons.io.IOUtils; 028import org.apache.commons.lang3.exception.ExceptionUtils; 029import org.dllearner.kb.repository.OntologyRepository; 030import org.dllearner.kb.repository.OntologyRepositoryEntry; 031import org.dllearner.utilities.Files; 032import org.semanticweb.owlapi.apibinding.OWLManager; 033import org.semanticweb.owlapi.model.*; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import javax.json.Json; 038import javax.json.JsonArray; 039import javax.json.JsonObject; 040import javax.json.JsonReader; 041import java.io.File; 042import java.io.FileOutputStream; 043import java.io.IOException; 044import java.io.InputStream; 045import java.net.HttpURLConnection; 046import java.net.URI; 047import java.net.URL; 048import java.util.*; 049 050public class BioPortalRepository implements OntologyRepository { 051 052 private static final Logger log = LoggerFactory.getLogger(BioPortalRepository.class); 053 054 private static final String apiKey = "20caf25c-f140-4fef-be68-ff1a3936f405"; 055 private static final String serviceURL = "http://rest.bioontology.org/bioportal/ontologies"; 056 057 private static final String listOntologiesURL = "http://data.bioontology.org/ontologies"; 058 private static final String downloadURL = "http://data.bioontology.org/ontologies/%s/download"; 059 060 private boolean initialized = false; 061 062 private List<OntologyRepositoryEntry> entries = new ArrayList<>(); 063 064 @Override 065 public String getName() { 066 return "BioPortal"; 067 } 068 069 @Override 070 public String getLocation() { 071 return "http://www.bioontology.org/"; 072 } 073 074 @Override 075 public void initialize() { 076 refresh(); 077 initialized = true; 078 } 079 080 @Override 081 public void refresh() { 082 fillRepository(); 083 } 084 085 private void fillRepository(){ 086 087 try { 088 HttpURLConnection conn = (HttpURLConnection)new URL(listOntologiesURL).openConnection(); 089 conn.setRequestProperty("Authorization", "apikey token=" + apiKey); 090 conn.setRequestMethod("GET"); 091 conn.setRequestProperty("Accept", "application/json"); 092 093 // list all ontologies 094 try (InputStream is = conn.getInputStream()) { 095 096// JsonParser jp = new JsonParser(); 097// jp.parse(new InputStreamReader(is)); 098 099 100 JsonReader rdr = Json.createReader(is); 101 JsonArray array = rdr.readArray(); 102 103 // for each ontology get the download link 104 for (JsonObject obj : array.getValuesAs(JsonObject.class)) { 105 String acronym = obj.getString("acronym"); 106 String name = obj.getString("name"); 107 108 URI physicalURI = URI.create(obj.getJsonObject("links").getString("download")); 109 String shortName = acronym; 110 boolean add = false; 111 entries.add(new RepositoryEntry(physicalURI, physicalURI, shortName)); 112 } 113 } catch( Exception e){ 114 e.printStackTrace(); 115 } 116 } catch(Exception e) { 117 118 } 119 120 log.info("Loaded " + entries.size() + " ontology entries from BioPortal."); 121 } 122 123 @Override 124 public Collection<OntologyRepositoryEntry> getEntries() { 125 if(!initialized){ 126 initialize(); 127 } 128 return entries; 129 } 130 131 @Override 132 public List<Object> getMetaDataKeys() { 133 // TODO Auto-generated method stub 134 return null; 135 } 136 137 private static InputStream getInputStream(URL url) throws IOException { 138 if (url.getProtocol().equals("http")) { 139 HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 140 conn.setRequestProperty("Authorization", "apikey token=" + apiKey); 141 conn.setRequestMethod("GET"); 142 conn.setRequestProperty("Accept", "application/rdf+xml"); 143 conn.addRequestProperty("Accept", "text/xml"); 144 conn.addRequestProperty("Accept", "*/*"); 145 return conn.getInputStream(); 146 } else { 147 return url.openStream(); 148 } 149 } 150 151 /** 152 * Download the ontologies and save them in the given directory. 153 * @param dir the directory 154 */ 155 public void download(File dir) { 156 157 } 158 159 /** 160 * Returns the ontology for the entry. 161 * @param entry the entry 162 * @return the OWL ontology 163 */ 164 public OWLOntology getOntology(OntologyRepositoryEntry entry) { 165 try(InputStream is = getInputStream(entry.getPhysicalURI().toURL())) { 166 OWLOntologyManager man = OWLManager.createOWLOntologyManager(); 167 man.addMissingImportListener(e -> { 168 log.warn("Missing import: " + e.getImportedOntologyURI()); 169 }); 170 171 // handle missing imports 172 OWLOntologyLoaderConfiguration conf = new OWLOntologyLoaderConfiguration(); 173 conf.setMissingImportHandlingStrategy(MissingImportHandlingStrategy.SILENT); 174 // the List ontology isn't online anymore, thus, we ignore it'S import 175 conf.addIgnoredImport(IRI.create("http://www.co-ode.org/ontologies/lists/2008/09/11/list.owl")); 176 man.setOntologyLoaderConfiguration(conf); 177 OWLOntology ont = man.loadOntologyFromOntologyDocument(is); 178 return ont; 179 } catch (Exception e) { 180 log.error("Failed to load ontology from " + entry.getPhysicalURI(), e); 181 } 182 return null; 183 } 184 185 public static void main(String[] args) throws Exception{ 186 187 // create Options object 188 OptionParser parser = new OptionParser(); 189 OptionSpec<File> baseDir = 190 parser.accepts( "basedir" ).withRequiredArg().ofType( File.class ).defaultsTo(new File(System.getProperty("java.io.tmpdir") + File.separator + "bioportal" + File.separator)); 191 OptionSpec<Void> downloadOption = 192 parser.accepts( "download" ); 193 OptionSpec<Void> parseOption = 194 parser.accepts( "parse" ); 195 196 OptionSet options = parser.parse(args); 197 198 File dir = options.valueOf(baseDir); 199 dir.mkdirs(); 200 201 File downloadDir = new File(dir, "download"); 202 File downloadSuccessfulDir = new File(downloadDir, "successful"); 203 File downloadFailedDir = new File(downloadDir, "failed"); 204 downloadSuccessfulDir.mkdirs(); 205 downloadFailedDir.mkdirs(); 206 File parsedDir = new File(dir, "parsed"); 207 File parsedSuccessfulDir = new File(parsedDir, "successful"); 208 File parsedFailedDir = new File(parsedDir, "failed"); 209 parsedSuccessfulDir.mkdirs(); 210 parsedFailedDir.mkdirs(); 211 212 BioPortalRepository repo = new BioPortalRepository(); 213 repo.initialize(); 214 215 Collection<OntologyRepositoryEntry> entries = repo.getEntries(); 216 System.out.println("BioPortal repository size: " + entries.size()); 217 218 boolean downloadEnabled = options.has(downloadOption); 219 boolean parseEnabled = options.has(parseOption); 220 221 final Map<String, String> map = Collections.synchronizedMap(new TreeMap<>()); 222 223 System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "4"); 224 225 entries.parallelStream().forEach(entry -> { 226 try { 227 228 File f = null; 229 long sizeInMb = 101; 230 if(downloadEnabled && !new File(downloadSuccessfulDir, entry.getOntologyShortName() + ".rdf").exists()) { 231 232 233 System.out.println("Loading " + entry.getOntologyShortName() + " from " + entry.getPhysicalURI()); 234 235 try(InputStream is = getInputStream(entry.getPhysicalURI().toURL())) { 236 f = new File(downloadSuccessfulDir, entry.getOntologyShortName() + ".rdf"); 237 238 IOUtils.copy(is, new FileOutputStream(f)); 239 240 sizeInMb = f.length() / (1024 * 1024); 241 242 System.out.println(entry.getOntologyShortName() + ": " + FileUtils.byteCountToDisplaySize(f.length())); 243 map.put(entry.getOntologyShortName(), FileUtils.byteCountToDisplaySize(f.length())); 244 } catch (Exception e) { 245 com.google.common.io.Files.asCharSink(new File(downloadFailedDir, entry.getOntologyShortName() + ".txt"), 246 Charsets.UTF_8).write(ExceptionUtils.getMessage(e)); 247 return; 248 } 249 } 250 251 if(f == null) { 252 System.out.println("Loading " + entry.getOntologyShortName() + " from disk"); 253 254 f = new File(downloadSuccessfulDir, entry.getOntologyShortName() + ".rdf"); 255 256 System.out.println(entry.getOntologyShortName() + ": " + FileUtils.byteCountToDisplaySize(f.length())); 257 258 sizeInMb = f.length() / (1024 * 1024); 259 } 260 261 if(f.exists() && parseEnabled && sizeInMb < 100) { 262 try { 263 OWLOntologyManager man = OWLManager.createOWLOntologyManager(); 264 man.addMissingImportListener(e -> { 265 System.out.println("Missing import: " + e.getImportedOntologyURI()); 266 }); 267 OWLOntologyLoaderConfiguration conf = new OWLOntologyLoaderConfiguration(); 268 conf.setMissingImportHandlingStrategy(MissingImportHandlingStrategy.SILENT); 269 conf.addIgnoredImport(IRI.create("http://www.co-ode.org/ontologies/lists/2008/09/11/list.owl")); 270 man.setOntologyLoaderConfiguration(conf); 271 OWLOntology ont = man.loadOntologyFromOntologyDocument(f); 272 System.out.println("#Axioms: " + ont.getLogicalAxiomCount()); 273 274 com.google.common.io.Files.asCharSink( 275 new File(parsedSuccessfulDir, entry.getOntologyShortName() + ".txt"), 276 Charsets.UTF_8).write( 277 ont.getLogicalAxiomCount() + "\t" + 278 ont.getClassesInSignature().size() + "\t" + 279 ont.getObjectPropertiesInSignature().size() + "\t" + 280 ont.getDataPropertiesInSignature().size() + "\t" + 281 ont.getIndividualsInSignature().size() 282 ); 283 284 map.replace(entry.getOntologyShortName(), map.get(entry.getOntologyShortName()) + "||#Axioms: " + ont.getLogicalAxiomCount()); 285 man.removeOntology(ont); 286 } catch (Exception e1) { 287 System.err.println("Failed to parse " + entry.getOntologyShortName()); 288 map.replace(entry.getOntologyShortName(), map.get(entry.getOntologyShortName()) + "||Parse Error"); 289 com.google.common.io.Files.asCharSink( 290 new File(parsedFailedDir, entry.getOntologyShortName() + ".txt"), 291 Charsets.UTF_8).write(ExceptionUtils.getMessage(e1)); 292 } 293 } 294 } catch (Exception e) { 295 System.err.println("Failed to load " + entry.getOntologyShortName() + ". Reason: " + e.getMessage()); 296// e.printStackTrace(); 297 map.put(entry.getOntologyShortName(), "Load error"); 298 } 299 }); 300 301 map.forEach((k, v) -> System.out.println(k + " -> " + v)); 302 } 303 304 private class RepositoryEntry implements OntologyRepositoryEntry { 305 306 private String shortName; 307 308 private URI ontologyURI; 309 310 private URI physicalURI; 311 312 public RepositoryEntry(URI ontologyURI, URI physicalURI, String shortName) { 313 this.ontologyURI = ontologyURI; 314 this.physicalURI = physicalURI; 315 this.shortName = shortName; 316 } 317 318 @Override 319 public String getOntologyShortName() { 320 return shortName; 321 } 322 323 @Override 324 public URI getOntologyURI() { 325 return ontologyURI; 326 } 327 328 @Override 329 public URI getPhysicalURI() { 330 return physicalURI; 331 } 332 333 @Override 334 public String getMetaData(Object key) { 335 return null; 336 } 337 338 } 339 340}