001/** 002 * Copyright (C) 2007-2008, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 * 019 */ 020package org.dllearner.examples; 021 022import org.dllearner.parser.KBParser; 023import org.dllearner.parser.ParseException; 024import org.dllearner.parser.PrologParser; 025import org.dllearner.prolog.Atom; 026import org.dllearner.prolog.Clause; 027import org.dllearner.prolog.Program; 028import org.dllearner.utilities.Files; 029import org.dllearner.utilities.Helper; 030import org.semanticweb.owlapi.apibinding.OWLManager; 031import org.semanticweb.owlapi.formats.RDFXMLDocumentFormat; 032import org.semanticweb.owlapi.model.*; 033import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 034 035import java.io.File; 036import java.io.FileOutputStream; 037import java.io.IOException; 038import java.util.*; 039 040/** 041 * This class maps the carcinogenesis Prolog files to an OWL file. In a first 042 * step, a Prolog parser is used to read all files. The main step involves 043 * applying mapping Prolog clauses to OWL axioms through domain specific mapping 044 * rules. 045 * 046 * The carcinogenesis Prolog files are available here: 047 * http://web.comlab.ox.ac.uk/oucl/research/areas/machlearn/cancer.html 048 * 049 * .f files contain positive and .n files contain negative examples. pte1.n and 050 * pte.f contain the PTE-1 challenge examples. train.n and train.f contain other 051 * examples which can be used to train for PTE-1. 052 * 053 * The PTE-2 directory contains PTE-2 files, i.e. all substances referred to in 054 * those files are only those of the PTE-2 challenge. 055 * 056 * @author Jens Lehmann 057 * 058 */ 059public class Carcinogenesis { 060 061 private static IRI ontologyIRI = IRI.create("http://dl-learner.org/carcinogenesis"); 062 063 // directory of Prolog files 064 private static final String prologDirectory = "../examples/carcinogenesis/prolog/"; 065 066 // mapping of symbols to names of chemical elements 067 private static Map<String, String> chemElements; 068 069 // structures in newgroups.pl 070 private static Set<String> newGroups = new TreeSet<>(); 071 072 // types of atoms, bonds, and structures 073 private static Set<String> atomTypes = new TreeSet<>(); 074 private static Set<String> bondTypes = new TreeSet<>(); 075 private static Set<String> structureTypes = new TreeSet<>(); 076 077 // we need a counter for bonds, because they are instances in OWL 078 // but not in Prolog 079 private static int bondNr = 0; 080 private static int structureNr = 0; 081 082 // list of all individuals in the knowlege base 083// private static Set<String> individuals = new TreeSet<String>(); 084 // list of all compounds 085 private static Set<String> compounds = new TreeSet<>(); 086 // compounds with positive ames test 087 private static Set<String> compoundsAmes = new TreeSet<>(); 088 // list of all bonds 089 private static Set<String> bonds = new TreeSet<>(); 090 091 // list of all "hasProperty" test 092 private static Set<String> tests = new TreeSet<>(); 093 094 // we ignore the ames test since its distribution in PTE-2 is so 095 // different from the training substances that a different testing 096 // strategy was probably in use 097 private static boolean ignoreAmes = false; 098 private static boolean ignoreSalmonella = false; 099 private static boolean ignoreCytogenCa = false; 100 private static boolean includeMutagenesis = true; 101 // if true we learn carcinogenic, if false we learn non-carcinogenic 102 private static boolean learnCarcinogenic = true; 103 private static boolean useNewGroups = true; 104 105 private static boolean createPTE1Conf = false; 106 private static boolean createPTE2Conf = false; 107 108 static OWLOntologyManager man = OWLManager.createOWLOntologyManager(); 109 static OWLDataFactory df = new OWLDataFactoryImpl(); 110 111 /** 112 * @param args 113 * No arguments supported. 114 */ 115 public static void main(String[] args) throws Exception { 116 117 String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl", 118 "ind_nos.pl", "ind_pos.pl"}; 119 // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl", 120 // "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl" 121 // "train.b" => not a pure Prolog file but Progol/Aleph specific 122 // }; 123 File owlFile = new File("/tmp/carcinogenesis.owl"); 124 125 Program program = null; 126 long startTime, duration; 127 String time; 128 129 // reading files 130 System.out.print("Reading in carcinogenesis Prolog files ... "); 131 startTime = System.nanoTime(); 132 String content = ""; 133 for (String file : files) { 134 content += Files.readFile(new File(prologDirectory + file)); 135 } 136 duration = System.nanoTime() - startTime; 137 time = Helper.prettyPrintNanoSeconds(duration, false, false); 138 System.out.println("OK (" + time + ")."); 139 140 // parsing files 141 System.out.print("Parsing Prolog files ... "); 142 startTime = System.nanoTime(); 143 PrologParser pp = new PrologParser(); 144 program = pp.parseProgram(content); 145 duration = System.nanoTime() - startTime; 146 time = Helper.prettyPrintNanoSeconds(duration, false, false); 147 System.out.println("OK (" + time + ")."); 148 149 // prepare mapping 150 OWLOntology kb = man.createOntology(); 151 createChemElementsMapping(); 152 createNewGroups(); 153 // create subclasses of atom 154 OWLClass atomClass = getAtomicConcept("Atom"); 155 for (String element : chemElements.values()) { 156 OWLClass elClass = getAtomicConcept(element); 157 OWLSubClassOfAxiom sc = df.getOWLSubClassOfAxiom(elClass, atomClass); 158 man.addAxiom(kb, sc); 159 } 160 // define properties including domain and range 161 String kbString = "DPDOMAIN(" + getURI2("charge") + ") = " + getURI2("Atom") + ".\n"; 162 kbString += "DPRANGE(" + getURI2("charge") + ") = DOUBLE.\n"; 163 if(!ignoreAmes) { 164 kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n"; 165 kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n"; 166 } 167 if(includeMutagenesis) { 168 kbString += "DPDOMAIN(" + getURI2("isMutagenic") + ") = " + getURI2("Compound") + ".\n"; 169 kbString += "DPRANGE(" + getURI2("isMutagenic") + ") = BOOLEAN.\n"; 170 } 171 kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n"; 172 kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n"; 173 kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n"; 174 kbString += "OPRANGE(" + getURI2("hasBond") + ") = " + getURI2("Bond") + ".\n"; 175 kbString += "OPDOMAIN(" + getURI2("inBond") + ") = " + getURI2("Bond") + ".\n"; 176 kbString += "OPRANGE(" + getURI2("inBond") + ") = " + getURI2("Atom") + ".\n"; 177 kbString += "OPDOMAIN(" + getURI2("hasStructure") + ") = " + getURI2("Compound") + ".\n"; 178 kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n"; 179 kbString += getURI2("Di") + " SUB " + getURI2("Structure") + ".\n"; 180 kbString += getURI2("Halide") + " SUB " + getURI2("Structure") + ".\n"; 181 kbString += getURI2("Ring") + " SUB " + getURI2("Structure") + ".\n"; 182 OWLOntology kb2 = KBParser.parseKBFile(kbString); 183 man.addAxioms(kb, kb2.getAxioms()); 184 185 // mapping clauses to axioms 186 System.out.print("Mapping clauses to axioms ... "); 187 startTime = System.nanoTime(); 188 ArrayList<Clause> clauses = program.getClauses(); 189 for (Clause clause : clauses) { 190 List<OWLAxiom> axioms = mapClause(clause); 191 for (OWLAxiom axiom : axioms) 192 man.addAxiom(kb, axiom); 193 } 194 195 if(includeMutagenesis) 196 addMutagenesis(kb); 197 198 // special handling for ames test (we assume the ames test 199 // was performed on all compounds but only the positive ones 200 // are in ames.pl [the rest is negative in Prolog by CWA], so 201 // we add negative test results here) 202 for(String compound : compounds) { 203 if(!ignoreAmes && !compoundsAmes.contains(compound)) { 204 OWLAxiom ames = getBooleanDatatypePropertyAssertion(compound, "amesTestPositive", false); 205 man.addAxiom(kb, ames); 206 } 207 } 208 209 // disjoint classes axioms 210 // OWL API is also buggy here, it adds a strange unused prefix 211 // and cannot parser its own generated file 212// DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(atomTypes); 213// kb.addAxiom(disjointAtomTypes); 214 String[] mainClasses = new String[] {"Compound", "Atom", "Bond", "Structure"}; 215 Set<String> mainClassesSet = new HashSet<>(Arrays.asList(mainClasses)); 216 OWLAxiom disjointAtomTypes = getDisjointClassesAxiom(mainClassesSet); 217 man.addAxiom(kb, disjointAtomTypes); 218 219 // all different axiom (UNA) 220 // exporting differentIndividuals axioms is broken in OWL API 221// individuals.addAll(compounds); 222// individuals.addAll(bonds); 223// DifferentIndividualsAxiom una = getDifferentIndividualsAxiom(individuals); 224// kb.addAxiom(una); 225 226 duration = System.nanoTime() - startTime; 227 time = Helper.prettyPrintNanoSeconds(duration, false, false); 228 System.out.println("OK (" + time + ")."); 229 230 // writing generated knowledge base 231 System.out.print("Writing OWL file ... "); 232 startTime = System.nanoTime(); 233 man.saveOntology(kb, new RDFXMLDocumentFormat(), new FileOutputStream(owlFile)); 234 duration = System.nanoTime() - startTime; 235 time = Helper.prettyPrintNanoSeconds(duration, false, false); 236 System.out.println("OK (" + time + ")."); 237 238 // generating conf files 239 File confTrainFile = new File("examples/carcinogenesis/train.conf"); 240 Files.clearFile(confTrainFile); 241 String confHeader = "import(\"carcinogenesis.owl\");\n\n"; 242 confHeader += "reasoner = fastInstanceChecker;\n"; 243 confHeader += "algorithm = refexamples;\n"; 244 confHeader += "refexamples.noisePercentage = 31;\n"; 245 confHeader += "refexamples.startClass = " + getURI2("Compound") + ";\n"; 246 confHeader += "refexamples.writeSearchTree = false;\n"; 247 confHeader += "refexamples.searchTreeFile = \"log/carcinogenesis/searchTree.log\";\n"; 248 confHeader += "\n"; 249 Files.appendToFile(confTrainFile, confHeader); 250 251 // generating training examples 252 File trainingFilePositives = new File(prologDirectory + "train.f"); 253 File trainingFileNegatives = new File(prologDirectory + "train.n"); 254 255 List<OWLIndividual> posTrainExamples = getExamples(trainingFilePositives); 256 List<OWLIndividual> negTrainExamples = getExamples(trainingFileNegatives); 257 appendPosExamples(confTrainFile, posTrainExamples); 258 appendNegExamples(confTrainFile, negTrainExamples); 259 260 // generating test examples for PTE-1 261 // => put all in one file, because they were used as training for PTE-2 262 File confPTE1File = new File("examples/carcinogenesis/testpte1.conf"); 263 File testPTE1Positives = new File(prologDirectory + "pte1.f"); 264 File testPTE1Negatives = new File(prologDirectory + "pte1.n"); 265 266 List<OWLIndividual> posPTE1Examples = getExamples(testPTE1Positives); 267 List<OWLIndividual> negPTE1Examples = getExamples(testPTE1Negatives); 268 appendPosExamples(confTrainFile, posPTE1Examples); 269 appendNegExamples(confTrainFile, negPTE1Examples); 270 if(createPTE1Conf) { 271 Files.clearFile(confPTE1File); 272 Files.appendToFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); 273 appendPosExamples(confPTE1File, posPTE1Examples); 274 appendNegExamples(confPTE1File, negPTE1Examples); 275 } 276 277 // create a PTE-2 test file 278 if(createPTE2Conf) { 279 File confPTE2File = new File("examples/carcinogenesis/testpte2.conf"); 280 Files.clearFile(confPTE2File); 281 Files.appendToFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); 282 Files.appendToFile(confPTE2File, getPTE2Examples()); 283 } 284 285 } 286 287 private static List<OWLAxiom> mapClause(Clause clause) throws ParseException, OWLOntologyCreationException { 288 List<OWLAxiom> axioms = new LinkedList<>(); 289 Atom head = clause.getHead(); 290 String headName = head.getName(); 291 // Body body = clause.getBody(); 292 // ArrayList<Literal> literals = body.getLiterals(); 293 // handle: atm(compound,atom,element,atomtype,charge) 294 295 // Ames-Test: http://en.wikipedia.org/wiki/Ames_test 296 // problem: the file apparently mentions only positive 297 // tests (why is it different from the other tests e.g. in 298 // gentoxprops.pl?) => we need to add negative axioms for the 299 // remaining stuff or use closed world assumption in the 300 // TBox dematerialisation later on 301 if(headName.equals("ames")) { 302 if(!ignoreAmes) { 303 String compoundName = head.getArgument(0).toPLString(); 304 OWLAxiom ames = getBooleanDatatypePropertyAssertion(compoundName, "amesTestPositive", true); 305 axioms.add(ames); 306 compoundsAmes.add(compoundName); 307 } 308 } else if (headName.equals("atm")) { 309 String compoundName = head.getArgument(0).toPLString(); 310 String atomName = head.getArgument(1).toPLString(); 311 String elementName = head.getArgument(2).toPLString(); 312 String type = head.getArgument(3).toPLString(); 313 double charge = Double.parseDouble(head.getArgument(4).toPLString()); 314 // make the compound an instance of the Compound class 315 OWLAxiom cmpAxiom = getConceptAssertion("Compound", compoundName); 316 axioms.add(cmpAxiom); 317 compounds.add(compoundName); 318 // relate compound and atom 319 OWLAxiom ra = getRoleAssertion("hasAtom", compoundName, atomName); 320 axioms.add(ra); 321 // atom is made instance of the correct class 322 String atomClass = getAtomClass(elementName, type); 323 OWLAxiom ca = getConceptAssertion(atomClass, atomName); 324 axioms.add(ca); 325 // write subclass axiom if doesn't exist already 326 if (!atomTypes.contains(atomClass)) { 327 OWLClass subClass = getAtomicConcept(atomClass); 328 OWLClass superClass = getAtomicConcept(getFullElementName(elementName)); 329 OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, superClass); 330 axioms.add(sc); 331 atomTypes.add(atomClass); 332 } 333 // charge of atom 334 OWLAxiom dpa = getDoubleDatatypePropertyAssertion(atomName, "charge", 335 charge); 336 axioms.add(dpa); 337 } else if (headName.equals("bond")) { 338 String compoundName = head.getArgument(0).toPLString(); 339 String atom1Name = head.getArgument(1).toPLString(); 340 String atom2Name = head.getArgument(2).toPLString(); 341 String bondType = head.getArgument(3).toPLString(); 342 String bondClass = "Bond-" + bondType; 343 String bondInstance = "bond" + bondNr; 344 bonds.add(bondInstance); 345 OWLAxiom op = getRoleAssertion("hasBond", compoundName, "bond" + bondNr); 346 axioms.add(op); 347 // make Bond-X subclass of Bond if that hasn't been done already 348 if (!bondTypes.contains(bondClass)) { 349 OWLClass subClass = getAtomicConcept(bondClass); 350 OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, getAtomicConcept("Bond")); 351 axioms.add(sc); 352 bondTypes.add(bondClass); 353 } 354 // make e.g. bond382 instance of Bond-3 355 OWLAxiom ca = getConceptAssertion(bondClass, bondInstance); 356 axioms.add(ca); 357 bondNr++; 358 // connect atoms with bond 359 OWLAxiom op1 = getRoleAssertion("inBond", bondInstance, atom1Name); 360 OWLAxiom op2 = getRoleAssertion("inBond", bondInstance, atom2Name); 361 axioms.add(op1); 362 axioms.add(op2); 363 } else if (headName.equals("has_property")) { 364 String compoundName = head.getArgument(0).toPLString(); 365 String testName = head.getArgument(1).toPLString(); 366 if(!(ignoreSalmonella && testName.equals("salmonella")) 367 && !(ignoreCytogenCa && testName.equals("cytogen_ca"))) { 368 String resultStr = head.getArgument(2).toPLString(); 369 boolean testResult = (resultStr.equals("p")); 370 371 // create a new datatype property if it does not exist already 372 if(!tests.contains(testName)) { 373 String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n"; 374 String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n"; 375 OWLOntology kb = KBParser.parseKBFile(axiom1 + axiom2); 376 axioms.addAll(kb.getAxioms()); 377 } 378 // create an axiom with the test result 379 OWLAxiom dpa = getBooleanDatatypePropertyAssertion(compoundName, testName, 380 testResult); 381 axioms.add(dpa); 382 } 383 // either parse this or ashby_alert - not both - ashby_alert contains 384 // all information in ind already 385 } else if (headName.equals("ind") || headName.equals("ring_no")) { 386 // parse this only if the new groups are not parsed 387// if(!useNewGroups) { 388 String compoundName = head.getArgument(0).toPLString(); 389 String structureName = head.getArgument(1).toPLString(); 390 int count = Integer.parseInt(head.getArgument(2).toPLString()); 391 // upper case first letter 392 String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1); 393 String structureInstance = structureName + "-" + structureNr; 394 395 addStructureSubclass(axioms, structureClass); 396 397 for(int i=0; i<count; i++) { 398 OWLAxiom op = getRoleAssertion("hasStructure", compoundName, structureInstance); 399 axioms.add(op); 400 // make e.g. halide10-382 instance of Bond-3 401 OWLAxiom ca = getConceptAssertion(structureClass, structureInstance); 402 axioms.add(ca); 403 structureNr++; 404 } 405// } 406 } else if (headName.equals("ashby_alert")) { 407 // ... currently ignored ... 408 } else if (newGroups.contains(headName)) { 409 if(useNewGroups) { 410 String compoundName = head.getArgument(0).toPLString(); 411 String structureName = headName; 412 // upper case first letter 413 String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1); 414 String structureInstance = structureName + "-" + structureNr; 415 416 addStructureSubclass(axioms, structureClass); 417 418 OWLAxiom op = getRoleAssertion("hasStructure", compoundName, structureInstance); 419 axioms.add(op); 420 OWLAxiom ca = getConceptAssertion(structureClass, structureInstance); 421 axioms.add(ca); 422 structureNr++; 423 } 424 } else { 425 // print clauses which are not supported yet 426 System.out.println("unsupported clause"); 427 System.out.println(clause.toPLString()); 428 System.out.println(clause); 429 System.exit(0); 430 } 431 return axioms; 432 } 433 434 private static void addStructureSubclass(List<OWLAxiom> axioms, String structureClass) { 435 // build in more fine-grained subclasses e.g. Di+number is subclass of Di 436 if (!structureTypes.contains(structureClass)) { 437 OWLClass nc = getAtomicConcept("Structure"); 438 if(structureClass.contains("Di")) 439 nc = getAtomicConcept("Di"); 440 else if(structureClass.contains("ring") || structureClass.contains("Ring")) 441 nc = getAtomicConcept("Ring"); 442 else if(structureClass.contains("halide") || structureClass.contains("Halide")) 443 nc = getAtomicConcept("Halide"); 444 OWLClass subClass = getAtomicConcept(structureClass); 445 OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, nc); 446 axioms.add(sc); 447 structureTypes.add(structureClass); 448 } 449 } 450 451 // takes a *.f or *.n file as input and returns the 452 // contained examples 453 private static List<OWLIndividual> getExamples(File file) throws IOException, ParseException { 454 String content = Files.readFile(file); 455 PrologParser pp = new PrologParser(); 456 Program programPos = pp.parseProgram(content); 457 List<OWLIndividual> ret = new LinkedList<>(); 458 for(Clause c : programPos.getClauses()) { 459 String example = c.getHead().getArgument(0).toPLString(); 460 ret.add(getIndividual(example)); 461 } 462 return ret; 463 } 464 465 public static void appendPosExamples(File file, List<OWLIndividual> examples) { 466 StringBuffer content = new StringBuffer(); 467 for(OWLIndividual example : examples) { 468 if(learnCarcinogenic) 469 content.append("+\"").append(example.toString()).append("\"\n"); 470 else 471 content.append("-\"").append(example.toString()).append("\"\n"); 472 } 473 Files.appendToFile(file, content.toString()); 474 } 475 476 public static void appendNegExamples(File file, List<OWLIndividual> examples) { 477 StringBuffer content = new StringBuffer(); 478 for(OWLIndividual example : examples) { 479 if(learnCarcinogenic) 480 content.append("-\"").append(example.toString()).append("\"\n"); 481 else 482 content.append("+\"").append(example.toString()).append("\"\n"); 483 } 484 Files.appendToFile(file, content.toString()); 485 } 486 487 private static String getAtomClass(String element, String atomType) { 488 return getFullElementName(element) + "-" + atomType; 489 } 490 491 private static OWLAxiom getConceptAssertion(String concept, String i) { 492 OWLIndividual ind = getIndividual(i); 493 OWLClass c = getAtomicConcept(concept); 494 return df.getOWLClassAssertionAxiom(c, ind); 495 } 496 497 private static OWLAxiom getRoleAssertion(String role, String i1, String i2) { 498 OWLIndividual ind1 = getIndividual(i1); 499 OWLIndividual ind2 = getIndividual(i2); 500 OWLObjectProperty ar = getRole(role); 501 return df.getOWLObjectPropertyAssertionAxiom(ar, ind1, ind2); 502 } 503 504 private static OWLAxiom getBooleanDatatypePropertyAssertion( 505 String individual, String datatypeProperty, boolean value) { 506 OWLIndividual ind = getIndividual(individual); 507 OWLDataProperty dp = getDatatypeProperty(datatypeProperty); 508 return df.getOWLDataPropertyAssertionAxiom(dp, ind, value); 509 } 510 511 private static OWLAxiom getDoubleDatatypePropertyAssertion( 512 String individual, String datatypeProperty, double value) { 513 OWLIndividual ind = getIndividual(individual); 514 OWLDataProperty dp = getDatatypeProperty(datatypeProperty); 515 return df.getOWLDataPropertyAssertionAxiom(dp, ind, value); 516 } 517 518 private static OWLAxiom getDisjointClassesAxiom(Set<String> classes) { 519 Set<OWLClassExpression> descriptions = new HashSet<>(); 520 for(String namedClass : classes) 521 descriptions.add(df.getOWLClass(IRI.create(getURI(namedClass)))); 522 return df.getOWLDisjointClassesAxiom(descriptions); 523 } 524 525 @SuppressWarnings({"unused"}) 526 private static OWLAxiom getDifferentIndividualsAxiom(Set<String> individuals) { 527 Set<OWLIndividual> inds = new HashSet<>(); 528 for(String i : individuals) 529 inds.add(getIndividual(i)); 530 return df.getOWLDifferentIndividualsAxiom(inds); 531 } 532 533 private static OWLIndividual getIndividual(String name) { 534 return df.getOWLNamedIndividual(IRI.create(ontologyIRI + "#" + name)); 535 } 536 537 private static OWLObjectProperty getRole(String name) { 538 return df.getOWLObjectProperty(IRI.create(ontologyIRI + "#" + name)); 539 } 540 541 private static OWLDataProperty getDatatypeProperty(String name) { 542 return df.getOWLDataProperty(IRI.create(ontologyIRI + "#" + name)); 543 } 544 545 private static OWLClass getAtomicConcept(String name) { 546 return df.getOWLClass(IRI.create(ontologyIRI + "#" + name)); 547 } 548 549 private static String getURI(String name) { 550 return ontologyIRI + "#" + name; 551 } 552 553 // returns URI including quotationsmark (need for KBparser) 554 private static String getURI2(String name) { 555 return "\"" + getURI(name) + "\""; 556 } 557 558 private static String getFullElementName(String abbreviation) { 559 // return corresponding element or throw an error if it 560 // is not in the list 561 String result = chemElements.get(abbreviation); 562 if (result == null) 563 throw new Error("Unknown element " + abbreviation); 564 else 565 return result; 566 } 567 568 // create chemical element list 569 private static void createChemElementsMapping() { 570 chemElements = new HashMap<>(); 571 chemElements.put("as", "Arsenic"); 572 chemElements.put("ba", "Barium"); 573 chemElements.put("br", "Bromine"); 574 chemElements.put("c", "Carbon"); 575 chemElements.put("ca", "Calcium"); 576 chemElements.put("cl", "Chlorine"); 577 chemElements.put("cu", "Copper"); 578 chemElements.put("f", "Fluorine"); 579 chemElements.put("ga", "Gallium"); 580 chemElements.put("h", "Hydrogen"); 581 chemElements.put("hg", "Mercury"); 582 chemElements.put("i", "Iodine"); 583 chemElements.put("k", "Krypton"); 584 chemElements.put("mn", "Manganese"); 585 chemElements.put("mo", "Molybdenum"); 586 chemElements.put("n", "Nitrogen"); 587 chemElements.put("na", "Sodium"); 588 chemElements.put("o", "Oxygen"); 589 chemElements.put("p", "Phosphorus"); 590 chemElements.put("pb", "Lead"); 591 chemElements.put("s", "Sulfur"); 592 chemElements.put("se", "Selenium"); 593 chemElements.put("sn", "Tin"); 594 chemElements.put("te", "Tellurium"); 595 chemElements.put("ti", "Titanium"); 596 chemElements.put("v", "Vanadium"); 597 chemElements.put("zn", "Zinc"); 598 } 599 600 private static void createNewGroups() { 601 String[] groups = new String[] {"six_ring", "non_ar_6c_ring", 602 "ketone", "amine", "alcohol", "ether", "ar_halide", 603 "five_ring", "non_ar_5c_ring", "alkyl_halide", 604 "methyl", "non_ar_hetero_5_ring", "nitro", "sulfo", 605 "methoxy", "amine", "aldehyde", "sulfide", 606 "non_ar_hetero_6_ring", "phenol", "carboxylic_acid", 607 "ester", "imine", 608 }; 609 610 List<String> list = Arrays.asList(groups); 611 newGroups.addAll(list); 612 } 613 614 /** 615 * <p>To find out whether a substance is carinogenetic go to 616 * "http://ntp-server.niehs.nih.gov/" and click 617 * on "Testing Status of Agents at NTP".</p> 618 * 619 * Levels: 620 * <ul> 621 * <li>CE = clear evidence</li> 622 * <li>SE = some evidence</li> 623 * <li>E = equivocal evidence</li> 624 * <li>NE = no evidence</li> 625 * </ul> 626 * Levels CE and SE are positive examples. E and NE negative examples. 627 * Experiments are performed on rats and mice of both genders, so we 628 * have four evidence values. An example is positive if at least one 629 * value is SE or CE. 630 * 631 * <p>Some values are taken from the IJCAI-97 paper of Muggleton.</p> 632 * 633 * <p>Positives (19): <br /> 634 * <ul> 635 * <li>t3 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCACAFD4-123F-7908-7B521E4F665EFBD9</li> 636 * <li>t4 (3CE+NE) - contradicts IJCAI-97 paper and should probably be case 75-52-5 instead of 75-52-8: http://ntp.niehs.nih.gov/index.cfm?objectid=BCE49084-123F-7908-7BE127F7AF1FFBB5</li> 637 * <li>t5: paper</li> 638 * <li>t7: paper</li> 639 * <li>t8: paper</li> 640 * <li>t9 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD7C6869-123F-7908-7BDEA4CFAA55CEA8</li> 641 * <li>t10: paper</li> 642 * <li>t12 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCB0ADE0-123F-7908-7BEC101C7309C4DE</li> 643 * <li>t14 (2CE+2NE) probably 111-42-2 instead of 11-42-2: http://ntp.niehs.nih.gov/index.cfm?objectid=BCC60FF1-123F-7908-7B2D579AA48DE90C</li> 644 * <li>t15: paper</li> 645 * <li>t16 (2CE+SE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCC5D9CE-123F-7908-7B959CCE5262468A</li> 646 * <li>t18 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCA087AA-123F-7908-7B79FDFDE3CDCF87</li> 647 * <li>t19 (2CE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAE5690-123F-7908-7B02E35E2BB57694</li> 648 * <li>t20 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF95607-123F-7908-7B0761D3C515CC12</li> 649 * <li>t21 (CE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFCB63C-123F-7908-7BF910C2783AE9FE</li> 650 * <li>t22 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD8345C2-123F-7908-7BC52FEF80F110E1</li> 651 * <li>t23 (4CE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCADD2D9-123F-7908-7B5C8180FE80B22F</li> 652 * <li>t24 (CE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFB19FF-123F-7908-7B845E176F13E6E1</li> 653 * <li>t25 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD2D2A62-123F-7908-7B0DA824E782754C</li> 654 * <li>t30 (2CE+SE+E) : http://ntp.niehs.nih.gov/index.cfm?objectid=BCB13734-123F-7908-7BEBA533E35A48B7</li> 655 * </ul> 656 * </p> 657 * 658 * <p>Negatives (10): 659 * <ul> 660 * <li>t1 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD9FF53C-123F-7908-7B123DAE0A25B122 </li> 661 * <li>t2 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF8651E-123F-7908-7B21DD5ED83CD0FF </li> 662 * <li><strike>t4: paper</strike></li> 663 * <li>t6: paper</li> 664 * <li>t11: paper</li> 665 * <li>t13 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD136ED6-123F-7908-7B619EE79F2FD062</li> 666 * <li>t17: paper</li> 667 * <li>t26 (2E+2NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD1E6209-123F-7908-7B95EB8BAE662CE7</li> 668 * <li>t27 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAC5D00-123F-7908-7BC46ECB72A6C91B</li> 669 * <li>t28 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD34E02A-123F-7908-7BC6791917B591DF</li> 670 * </ul> 671 * </p> 672 * 673 * <p>Unclear (1): 674 * <ul> 675 * <li>t29: probably a negative (see http://ntp.niehs.nih.gov/index.cfm?objectid=BD855EA1-123F-7908-7B573FC3C08188DC) but 676 * no tests directly for this substance</li> 677 * </ul> 678 * 679 * <p>The following examples are probably not part of the IJCAI PTE-2 challenge 680 * (reports younger than 1998): 681 * <ul> 682 * <li>pos: t21 (5/99), t25 (9/04), t30(10/01)</li> 683 * <li>neg: t26 (5/99), t27 (05/01), t28 (05/00), t29 (09/02)</li> 684 * </ul> 685 * </p> 686 * </p> 687 * @return A string for all examples as used in the conf file. 688 */ 689 public static String getPTE2Examples() { 690 String[] pos = new String[] {"t3","t4","t5","t7","t8", 691 "t9", 692 "t10","t12", 693 "t14","t15","t16","t18","t19","t20", 694 "t21", 695 "t22", 696 "t23", 697 "t24", 698 "t25", 699 "t30"}; 700 String[] neg = new String[] {"t1", "t2", 701 "t6", "t11", "t13", 702 "t17","t26","t27", 703 "t28","t29" 704 }; 705 706 String ret = ""; 707 for(String posEx : pos) { 708 if(learnCarcinogenic) 709 ret += "+" + getURI2(posEx) + "\n"; 710 else 711 ret += "-" + getURI2(posEx) + "\n"; 712 } 713 for(String negEx : neg) { 714 if(learnCarcinogenic) 715 ret += "-" + getURI2(negEx) + "\n"; 716 else 717 ret += "+" + getURI2(negEx) + "\n"; 718 } 719 720 return ret; 721 } 722 723 private static void addMutagenesis(OWLOntology kb) { 724 String[] mutagenicCompounds = new String[] { 725 "d101", "d104", "d106", "d107", "d112", "d113", "d117", 726 "d121", "d123", "d126", "d128", "d13", "d135", "d137", 727 "d139", "d140", "d143", "d144", "d145", "d146", "d147", 728 "d152", "d153", "d154", "d155", "d156", "d159", "d160", 729 "d161", "d163", "d164", "d166", "d168", "d171", "d173", 730 "d174", "d177", "d179", "d18", "d180", "d182", "d183", 731 "d185", "d186", "d187", "d188", "d189", "d19", "d191", 732 "d192", "d193", "d195", "d197", "d2", "d201", "d202", 733 "d205", "d206", "d207", "d211", "d214", "d215", "d216", 734 "d224", "d225", "d227", "d228", "d229", "d231", "d235", 735 "d237", "d239", "d242", "d245", "d246", "d249", "d251", 736 "d254", "d257", "d258", "d261", "d264", "d266", "d269", 737 "d27", "d270", "d271", "d28", "d288", "d292", "d297", 738 "d300", "d308", "d309", "d311", "d313", "d314", "d322", 739 "d323", "d324", "d329", "d330", "d332", "d334", "d35", 740 "d36", "d37", "d38", "d41", "d42", "d48", "d50", "d51", 741 "d54", "d58", "d61", "d62", "d63", "d66", "d69", "d72", 742 "d76", "d77", "d78", "d84", "d86", "d89", "d92", "d96"}; 743 TreeSet<String> mutagenic = new TreeSet<>(Arrays.asList(mutagenicCompounds)); 744 745 for(String compound : compounds) { 746 if(mutagenic.contains(compound)) { 747 OWLAxiom muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", true); 748 man.addAxiom(kb, muta); 749 } else { 750 OWLAxiom muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", false); 751 man.addAxiom(kb, muta); 752 } 753 } 754 } 755}