001/**
002 * Copyright (C) 2007-2008, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 * 
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020package org.dllearner.examples;
021
022import org.dllearner.parser.KBParser;
023import org.dllearner.parser.ParseException;
024import org.dllearner.parser.PrologParser;
025import org.dllearner.prolog.Atom;
026import org.dllearner.prolog.Clause;
027import org.dllearner.prolog.Program;
028import org.dllearner.utilities.Files;
029import org.dllearner.utilities.Helper;
030import org.semanticweb.owlapi.apibinding.OWLManager;
031import org.semanticweb.owlapi.formats.RDFXMLDocumentFormat;
032import org.semanticweb.owlapi.model.*;
033import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
034
035import java.io.File;
036import java.io.FileOutputStream;
037import java.io.IOException;
038import java.util.*;
039
040/**
041 * This class maps the carcinogenesis Prolog files to an OWL file. In a first
042 * step, a Prolog parser is used to read all files. The main step involves
043 * applying mapping Prolog clauses to OWL axioms through domain specific mapping
044 * rules.
045 * 
046 * The carcinogenesis Prolog files are available here:
047 * http://web.comlab.ox.ac.uk/oucl/research/areas/machlearn/cancer.html
048 * 
049 * .f files contain positive and .n files contain negative examples. pte1.n and
050 * pte.f contain the PTE-1 challenge examples. train.n and train.f contain other
051 * examples which can be used to train for PTE-1.
052 * 
053 * The PTE-2 directory contains PTE-2 files, i.e. all substances referred to in
054 * those files are only those of the PTE-2 challenge.
055 * 
056 * @author Jens Lehmann
057 * 
058 */
059public class Carcinogenesis {
060
061        private static IRI ontologyIRI = IRI.create("http://dl-learner.org/carcinogenesis");
062
063        // directory of Prolog files
064        private static final String prologDirectory = "../examples/carcinogenesis/prolog/";     
065        
066        // mapping of symbols to names of chemical elements
067        private static Map<String, String> chemElements;
068
069        // structures in newgroups.pl
070        private static Set<String> newGroups = new TreeSet<>();
071        
072        // types of atoms, bonds, and structures
073        private static Set<String> atomTypes = new TreeSet<>();
074        private static Set<String> bondTypes = new TreeSet<>();
075        private static Set<String> structureTypes = new TreeSet<>();
076
077        // we need a counter for bonds, because they are instances in OWL
078        // but not in Prolog
079        private static int bondNr = 0;
080        private static int structureNr = 0;
081        
082        // list of all individuals in the knowlege base
083//      private static Set<String> individuals = new TreeSet<String>(); 
084        // list of all compounds
085        private static Set<String> compounds = new TreeSet<>();
086        // compounds with positive ames test
087        private static Set<String> compoundsAmes = new TreeSet<>();
088        // list of all bonds
089        private static Set<String> bonds = new TreeSet<>();
090        
091        // list of all "hasProperty" test
092        private static Set<String> tests = new TreeSet<>();
093        
094        // we ignore the ames test since its distribution in PTE-2 is so
095        // different from the training substances that a different testing
096        // strategy was probably in use
097        private static boolean ignoreAmes = false;
098        private static boolean ignoreSalmonella = false;
099        private static boolean ignoreCytogenCa = false;
100        private static boolean includeMutagenesis = true;
101        // if true we learn carcinogenic, if false we learn non-carcinogenic
102        private static boolean learnCarcinogenic = true;
103        private static boolean useNewGroups = true;
104        
105        private static boolean createPTE1Conf = false;
106        private static boolean createPTE2Conf = false;
107        
108        static OWLOntologyManager man = OWLManager.createOWLOntologyManager();
109        static OWLDataFactory df = new OWLDataFactoryImpl();
110        
111        /**
112         * @param args
113         *            No arguments supported.
114         */
115        public static void main(String[] args) throws Exception {
116                
117                String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl",
118                                "ind_nos.pl", "ind_pos.pl"};
119                // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl",
120                //              "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl"
121                // "train.b" => not a pure Prolog file but Progol/Aleph specific
122                // };
123                File owlFile = new File("/tmp/carcinogenesis.owl");
124
125                Program program = null;
126                long startTime, duration;
127                String time;
128
129                // reading files
130                System.out.print("Reading in carcinogenesis Prolog files ... ");
131                startTime = System.nanoTime();
132                String content = "";
133                for (String file : files) {
134                        content += Files.readFile(new File(prologDirectory + file));
135                }
136                duration = System.nanoTime() - startTime;
137                time = Helper.prettyPrintNanoSeconds(duration, false, false);
138                System.out.println("OK (" + time + ").");
139
140                // parsing files
141                System.out.print("Parsing Prolog files ... ");
142                startTime = System.nanoTime();
143                PrologParser pp = new PrologParser();
144                program = pp.parseProgram(content);
145                duration = System.nanoTime() - startTime;
146                time = Helper.prettyPrintNanoSeconds(duration, false, false);
147                System.out.println("OK (" + time + ").");
148
149                // prepare mapping
150                OWLOntology kb = man.createOntology();
151                createChemElementsMapping();
152                createNewGroups();
153                // create subclasses of atom
154                OWLClass atomClass = getAtomicConcept("Atom");
155                for (String element : chemElements.values()) {
156                        OWLClass elClass = getAtomicConcept(element);
157                        OWLSubClassOfAxiom sc = df.getOWLSubClassOfAxiom(elClass, atomClass);
158                        man.addAxiom(kb, sc);
159                }
160                // define properties including domain and range
161                String kbString = "DPDOMAIN(" + getURI2("charge") + ") = " + getURI2("Atom") + ".\n";
162                kbString += "DPRANGE(" + getURI2("charge") + ") = DOUBLE.\n";
163                if(!ignoreAmes) {
164                        kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n";
165                        kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n";
166                }
167                if(includeMutagenesis) {
168                        kbString += "DPDOMAIN(" + getURI2("isMutagenic") + ") = " + getURI2("Compound") + ".\n";
169                        kbString += "DPRANGE(" + getURI2("isMutagenic") + ") = BOOLEAN.\n";
170                }
171                kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n";
172                kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n";
173                kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n";
174                kbString += "OPRANGE(" + getURI2("hasBond") + ") = " + getURI2("Bond") + ".\n";
175                kbString += "OPDOMAIN(" + getURI2("inBond") + ") = " + getURI2("Bond") + ".\n";
176                kbString += "OPRANGE(" + getURI2("inBond") + ") = " + getURI2("Atom") + ".\n";
177                kbString += "OPDOMAIN(" + getURI2("hasStructure") + ") = " + getURI2("Compound") + ".\n";
178                kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n";
179                kbString += getURI2("Di") + " SUB " + getURI2("Structure") + ".\n";
180                kbString += getURI2("Halide") + " SUB " + getURI2("Structure") + ".\n";
181                kbString += getURI2("Ring") + " SUB " + getURI2("Structure") + ".\n";
182                OWLOntology kb2 = KBParser.parseKBFile(kbString);
183                man.addAxioms(kb, kb2.getAxioms());
184
185                // mapping clauses to axioms
186                System.out.print("Mapping clauses to axioms ... ");
187                startTime = System.nanoTime();
188                ArrayList<Clause> clauses = program.getClauses();
189                for (Clause clause : clauses) {
190                        List<OWLAxiom> axioms = mapClause(clause);
191                        for (OWLAxiom axiom : axioms)
192                                man.addAxiom(kb, axiom);
193                }
194                
195                if(includeMutagenesis)
196                        addMutagenesis(kb);
197                
198                // special handling for ames test (we assume the ames test
199                // was performed on all compounds but only the positive ones
200                // are in ames.pl [the rest is negative in Prolog by CWA], so
201                // we add negative test results here)
202                for(String compound : compounds) {
203                        if(!ignoreAmes && !compoundsAmes.contains(compound)) {
204                                OWLAxiom ames = getBooleanDatatypePropertyAssertion(compound, "amesTestPositive", false);
205                                man.addAxiom(kb, ames);
206                        }
207                }
208                
209                // disjoint classes axioms
210                // OWL API is also buggy here, it adds a strange unused prefix
211                // and cannot parser its own generated file
212//              DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(atomTypes);
213//              kb.addAxiom(disjointAtomTypes);
214                String[] mainClasses = new String[] {"Compound", "Atom", "Bond", "Structure"};
215                Set<String> mainClassesSet = new HashSet<>(Arrays.asList(mainClasses));
216                OWLAxiom disjointAtomTypes = getDisjointClassesAxiom(mainClassesSet);
217                man.addAxiom(kb, disjointAtomTypes);            
218                
219                // all different axiom (UNA)
220                // exporting differentIndividuals axioms is broken in OWL API
221//              individuals.addAll(compounds);
222//              individuals.addAll(bonds);
223//              DifferentIndividualsAxiom una = getDifferentIndividualsAxiom(individuals);
224//              kb.addAxiom(una);
225                
226                duration = System.nanoTime() - startTime;
227                time = Helper.prettyPrintNanoSeconds(duration, false, false);
228                System.out.println("OK (" + time + ").");
229
230                // writing generated knowledge base
231                System.out.print("Writing OWL file ... ");
232                startTime = System.nanoTime();
233                man.saveOntology(kb, new RDFXMLDocumentFormat(), new FileOutputStream(owlFile));
234                duration = System.nanoTime() - startTime;
235                time = Helper.prettyPrintNanoSeconds(duration, false, false);
236                System.out.println("OK (" + time + ").");
237
238                // generating conf files
239                File confTrainFile = new File("examples/carcinogenesis/train.conf");
240                Files.clearFile(confTrainFile);
241                String confHeader = "import(\"carcinogenesis.owl\");\n\n";
242                confHeader += "reasoner = fastInstanceChecker;\n";
243                confHeader += "algorithm = refexamples;\n";
244                confHeader += "refexamples.noisePercentage = 31;\n";
245                confHeader += "refexamples.startClass = " + getURI2("Compound") + ";\n";
246                confHeader += "refexamples.writeSearchTree = false;\n";
247                confHeader += "refexamples.searchTreeFile = \"log/carcinogenesis/searchTree.log\";\n";
248                confHeader += "\n";
249                Files.appendToFile(confTrainFile, confHeader);
250                
251                // generating training examples
252                File trainingFilePositives = new File(prologDirectory + "train.f");
253                File trainingFileNegatives = new File(prologDirectory + "train.n");
254
255                List<OWLIndividual> posTrainExamples = getExamples(trainingFilePositives);
256                List<OWLIndividual> negTrainExamples = getExamples(trainingFileNegatives);
257                appendPosExamples(confTrainFile, posTrainExamples);
258                appendNegExamples(confTrainFile, negTrainExamples);
259                
260                // generating test examples for PTE-1
261                // => put all in one file, because they were used as training for PTE-2
262                File confPTE1File = new File("examples/carcinogenesis/testpte1.conf");
263                File testPTE1Positives = new File(prologDirectory + "pte1.f");
264                File testPTE1Negatives = new File(prologDirectory + "pte1.n");
265                
266                List<OWLIndividual> posPTE1Examples = getExamples(testPTE1Positives);
267                List<OWLIndividual> negPTE1Examples = getExamples(testPTE1Negatives);
268                appendPosExamples(confTrainFile, posPTE1Examples);
269                appendNegExamples(confTrainFile, negPTE1Examples);
270                if(createPTE1Conf) {
271                        Files.clearFile(confPTE1File);
272                        Files.appendToFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
273                        appendPosExamples(confPTE1File, posPTE1Examples);
274                        appendNegExamples(confPTE1File, negPTE1Examples);
275                }
276                
277                // create a PTE-2 test file
278                if(createPTE2Conf) {
279                        File confPTE2File = new File("examples/carcinogenesis/testpte2.conf");
280                        Files.clearFile(confPTE2File);
281                        Files.appendToFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
282                        Files.appendToFile(confPTE2File, getPTE2Examples());
283                }
284
285        }
286
287        private static List<OWLAxiom> mapClause(Clause clause) throws ParseException, OWLOntologyCreationException {
288                List<OWLAxiom> axioms = new LinkedList<>();
289                Atom head = clause.getHead();
290                String headName = head.getName();
291                // Body body = clause.getBody();
292                // ArrayList<Literal> literals = body.getLiterals();
293                // handle: atm(compound,atom,element,atomtype,charge)
294                
295                // Ames-Test: http://en.wikipedia.org/wiki/Ames_test
296                // problem: the file apparently mentions only positive
297                // tests (why is it different from the other tests e.g. in
298                // gentoxprops.pl?) => we need to add negative axioms for the
299                // remaining stuff or use closed world assumption in the 
300                // TBox dematerialisation later on
301                if(headName.equals("ames")) {
302                        if(!ignoreAmes) {
303                        String compoundName = head.getArgument(0).toPLString();
304                        OWLAxiom ames = getBooleanDatatypePropertyAssertion(compoundName, "amesTestPositive", true);
305                        axioms.add(ames);
306                        compoundsAmes.add(compoundName);
307                        }
308                } else if (headName.equals("atm")) {
309                        String compoundName = head.getArgument(0).toPLString();
310                        String atomName = head.getArgument(1).toPLString();
311                        String elementName = head.getArgument(2).toPLString();
312                        String type = head.getArgument(3).toPLString();
313                        double charge = Double.parseDouble(head.getArgument(4).toPLString());
314                        // make the compound an instance of the Compound class
315                        OWLAxiom cmpAxiom = getConceptAssertion("Compound", compoundName);
316                        axioms.add(cmpAxiom);
317                        compounds.add(compoundName);
318                        // relate compound and atom
319                        OWLAxiom ra = getRoleAssertion("hasAtom", compoundName, atomName);
320                        axioms.add(ra);
321                        // atom is made instance of the correct class
322                        String atomClass = getAtomClass(elementName, type);
323                        OWLAxiom ca = getConceptAssertion(atomClass, atomName);
324                        axioms.add(ca);
325                        // write subclass axiom if doesn't exist already
326                        if (!atomTypes.contains(atomClass)) {
327                                OWLClass subClass = getAtomicConcept(atomClass);
328                                OWLClass superClass = getAtomicConcept(getFullElementName(elementName));
329                                OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, superClass);
330                                axioms.add(sc);
331                                atomTypes.add(atomClass);
332                        }
333                        // charge of atom
334                        OWLAxiom dpa = getDoubleDatatypePropertyAssertion(atomName, "charge",
335                                        charge);
336                        axioms.add(dpa);
337                } else if (headName.equals("bond")) {
338                        String compoundName = head.getArgument(0).toPLString();
339                        String atom1Name = head.getArgument(1).toPLString();
340                        String atom2Name = head.getArgument(2).toPLString();
341                        String bondType = head.getArgument(3).toPLString();
342                        String bondClass = "Bond-" + bondType;
343                        String bondInstance = "bond" + bondNr;
344                        bonds.add(bondInstance);
345                        OWLAxiom op = getRoleAssertion("hasBond", compoundName, "bond" + bondNr);
346                        axioms.add(op);
347                        // make Bond-X subclass of Bond if that hasn't been done already
348                        if (!bondTypes.contains(bondClass)) {
349                                OWLClass subClass = getAtomicConcept(bondClass);
350                                OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, getAtomicConcept("Bond"));
351                                axioms.add(sc);
352                                bondTypes.add(bondClass);
353                        }
354                        // make e.g. bond382 instance of Bond-3
355                        OWLAxiom ca = getConceptAssertion(bondClass, bondInstance);
356                        axioms.add(ca);
357                        bondNr++;
358                        // connect atoms with bond
359                        OWLAxiom op1 = getRoleAssertion("inBond", bondInstance, atom1Name);
360                        OWLAxiom op2 = getRoleAssertion("inBond", bondInstance, atom2Name);
361                        axioms.add(op1);
362                        axioms.add(op2);
363                } else if (headName.equals("has_property")) {
364                        String compoundName = head.getArgument(0).toPLString();
365                        String testName = head.getArgument(1).toPLString();
366                        if(!(ignoreSalmonella && testName.equals("salmonella"))
367                                && !(ignoreCytogenCa && testName.equals("cytogen_ca"))) {
368                                String resultStr = head.getArgument(2).toPLString();
369                                boolean testResult = (resultStr.equals("p"));
370                                        
371                                // create a new datatype property if it does not exist already
372                                if(!tests.contains(testName)) {
373                                        String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n";
374                                        String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n";
375                                        OWLOntology kb = KBParser.parseKBFile(axiom1 + axiom2);
376                                        axioms.addAll(kb.getAxioms());
377                                }
378                                // create an axiom with the test result
379                                OWLAxiom dpa = getBooleanDatatypePropertyAssertion(compoundName, testName,
380                                                testResult);
381                                axioms.add(dpa);
382                        }
383                // either parse this or ashby_alert - not both - ashby_alert contains
384                // all information in ind already
385                } else if (headName.equals("ind") || headName.equals("ring_no")) {
386                        // parse this only if the new groups are not parsed
387//                      if(!useNewGroups) {
388                        String compoundName = head.getArgument(0).toPLString();
389                        String structureName = head.getArgument(1).toPLString();
390                        int count = Integer.parseInt(head.getArgument(2).toPLString());
391                        // upper case first letter
392                        String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);
393                        String structureInstance = structureName + "-" + structureNr;
394                        
395                        addStructureSubclass(axioms, structureClass);   
396                        
397                        for(int i=0; i<count; i++) {
398                                OWLAxiom op = getRoleAssertion("hasStructure", compoundName, structureInstance);
399                                axioms.add(op);
400                                // make e.g. halide10-382 instance of Bond-3
401                                OWLAxiom ca = getConceptAssertion(structureClass, structureInstance);
402                                axioms.add(ca);
403                                structureNr++;
404                        }
405//                      }
406                } else if (headName.equals("ashby_alert")) {
407                        // ... currently ignored ...
408                } else if (newGroups.contains(headName)) {
409                        if(useNewGroups) {
410                        String compoundName = head.getArgument(0).toPLString();
411                        String structureName = headName;
412                        // upper case first letter
413                        String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);
414                                String structureInstance = structureName + "-" + structureNr;
415                        
416                        addStructureSubclass(axioms, structureClass);
417                        
418                                OWLAxiom op = getRoleAssertion("hasStructure", compoundName, structureInstance);
419                                axioms.add(op);
420                                OWLAxiom ca = getConceptAssertion(structureClass, structureInstance);
421                                axioms.add(ca);
422                                structureNr++;
423                        }
424                } else {
425                        // print clauses which are not supported yet
426                        System.out.println("unsupported clause");
427                        System.out.println(clause.toPLString());
428                        System.out.println(clause);
429                        System.exit(0);
430                }
431                return axioms;
432        }
433
434        private static void addStructureSubclass(List<OWLAxiom> axioms, String structureClass) {
435                // build in more fine-grained subclasses e.g. Di+number is subclass of Di
436                if (!structureTypes.contains(structureClass)) {
437                        OWLClass nc = getAtomicConcept("Structure");
438                        if(structureClass.contains("Di"))
439                                nc = getAtomicConcept("Di");
440                        else if(structureClass.contains("ring") || structureClass.contains("Ring"))
441                                nc = getAtomicConcept("Ring");
442                        else if(structureClass.contains("halide") || structureClass.contains("Halide"))
443                                nc = getAtomicConcept("Halide");
444                        OWLClass subClass = getAtomicConcept(structureClass);
445                        OWLAxiom sc = df.getOWLSubClassOfAxiom(subClass, nc);
446                        axioms.add(sc);
447                        structureTypes.add(structureClass);
448                }                       
449        }
450        
451        // takes a *.f or *.n file as input and returns the 
452        // contained examples
453        private static List<OWLIndividual> getExamples(File file) throws IOException, ParseException {
454                String content = Files.readFile(file);
455                PrologParser pp = new PrologParser();
456                Program programPos = pp.parseProgram(content);
457                List<OWLIndividual> ret = new LinkedList<>();
458                for(Clause c : programPos.getClauses()) {
459                        String example = c.getHead().getArgument(0).toPLString();
460                        ret.add(getIndividual(example));
461                }
462                return ret;
463        }
464        
465        public static void appendPosExamples(File file, List<OWLIndividual> examples) {
466                StringBuffer content = new StringBuffer();
467                for(OWLIndividual example : examples) {
468                        if(learnCarcinogenic)
469                                content.append("+\"").append(example.toString()).append("\"\n");
470                        else
471                                content.append("-\"").append(example.toString()).append("\"\n");
472                }
473                Files.appendToFile(file, content.toString());
474        }
475        
476        public static void appendNegExamples(File file, List<OWLIndividual> examples) {
477                StringBuffer content = new StringBuffer();
478                for(OWLIndividual example : examples) {
479                        if(learnCarcinogenic)
480                                content.append("-\"").append(example.toString()).append("\"\n");
481                        else
482                                content.append("+\"").append(example.toString()).append("\"\n");
483                }
484                Files.appendToFile(file, content.toString());
485        }       
486        
487        private static String getAtomClass(String element, String atomType) {
488                return getFullElementName(element) + "-" + atomType;
489        }
490
491        private static OWLAxiom getConceptAssertion(String concept, String i) {
492                OWLIndividual ind = getIndividual(i);
493                OWLClass c = getAtomicConcept(concept);
494                return df.getOWLClassAssertionAxiom(c, ind);
495        }
496
497        private static OWLAxiom getRoleAssertion(String role, String i1, String i2) {
498                OWLIndividual ind1 = getIndividual(i1);
499                OWLIndividual ind2 = getIndividual(i2);
500                OWLObjectProperty ar = getRole(role);
501                return df.getOWLObjectPropertyAssertionAxiom(ar, ind1, ind2);
502        }
503
504        private static OWLAxiom getBooleanDatatypePropertyAssertion(
505                        String individual, String datatypeProperty, boolean value) {
506                OWLIndividual ind = getIndividual(individual);
507                OWLDataProperty dp = getDatatypeProperty(datatypeProperty);
508                return  df.getOWLDataPropertyAssertionAxiom(dp, ind, value);
509        }       
510        
511        private static OWLAxiom getDoubleDatatypePropertyAssertion(
512                        String individual, String datatypeProperty, double value) {
513                OWLIndividual ind = getIndividual(individual);
514                OWLDataProperty dp = getDatatypeProperty(datatypeProperty);
515                return df.getOWLDataPropertyAssertionAxiom(dp, ind, value);
516        }
517
518        private static OWLAxiom getDisjointClassesAxiom(Set<String> classes) {
519                Set<OWLClassExpression> descriptions = new HashSet<>();
520                for(String namedClass : classes)
521                        descriptions.add(df.getOWLClass(IRI.create(getURI(namedClass))));
522                return df.getOWLDisjointClassesAxiom(descriptions);
523        }
524        
525        @SuppressWarnings({"unused"})
526        private static OWLAxiom getDifferentIndividualsAxiom(Set<String> individuals) {
527                Set<OWLIndividual> inds = new HashSet<>();
528                for(String i : individuals)
529                        inds.add(getIndividual(i));
530                return df.getOWLDifferentIndividualsAxiom(inds);
531        }       
532        
533        private static OWLIndividual getIndividual(String name) {
534                return df.getOWLNamedIndividual(IRI.create(ontologyIRI + "#" + name));
535        }
536
537        private static OWLObjectProperty getRole(String name) {
538                return df.getOWLObjectProperty(IRI.create(ontologyIRI + "#" + name));
539        }
540
541        private static OWLDataProperty getDatatypeProperty(String name) {
542                return df.getOWLDataProperty(IRI.create(ontologyIRI + "#" + name));
543        }
544
545        private static OWLClass getAtomicConcept(String name) {
546                return df.getOWLClass(IRI.create(ontologyIRI + "#" + name));
547        }
548
549        private static String getURI(String name) {
550                return ontologyIRI + "#" + name;
551        }
552        
553        // returns URI including quotationsmark (need for KBparser)
554        private static String getURI2(String name) {
555                return "\"" + getURI(name) + "\"";
556        }       
557        
558        private static String getFullElementName(String abbreviation) {
559                // return corresponding element or throw an error if it
560                // is not in the list
561                String result = chemElements.get(abbreviation);
562                if (result == null)
563                        throw new Error("Unknown element " + abbreviation);
564                else
565                        return result;
566        }
567
568        // create chemical element list
569        private static void createChemElementsMapping() {
570                chemElements = new HashMap<>();
571                chemElements.put("as", "Arsenic");
572                chemElements.put("ba", "Barium");
573                chemElements.put("br", "Bromine");
574                chemElements.put("c", "Carbon");
575                chemElements.put("ca", "Calcium");
576                chemElements.put("cl", "Chlorine");
577                chemElements.put("cu", "Copper");
578                chemElements.put("f", "Fluorine");
579                chemElements.put("ga", "Gallium");
580                chemElements.put("h", "Hydrogen");
581                chemElements.put("hg", "Mercury");
582                chemElements.put("i", "Iodine");
583                chemElements.put("k", "Krypton");
584                chemElements.put("mn", "Manganese");
585                chemElements.put("mo", "Molybdenum");
586                chemElements.put("n", "Nitrogen");
587                chemElements.put("na", "Sodium");
588                chemElements.put("o", "Oxygen");
589                chemElements.put("p", "Phosphorus");
590                chemElements.put("pb", "Lead");
591                chemElements.put("s", "Sulfur");
592                chemElements.put("se", "Selenium");
593                chemElements.put("sn", "Tin");
594                chemElements.put("te", "Tellurium");
595                chemElements.put("ti", "Titanium");
596                chemElements.put("v", "Vanadium");
597                chemElements.put("zn", "Zinc");
598        }
599        
600        private static void createNewGroups() {         
601                String[] groups = new String[] {"six_ring", "non_ar_6c_ring",
602                                "ketone", "amine", "alcohol", "ether", "ar_halide",
603                                "five_ring", "non_ar_5c_ring", "alkyl_halide",
604                                "methyl", "non_ar_hetero_5_ring", "nitro", "sulfo",
605                                "methoxy", "amine", "aldehyde", "sulfide",
606                                "non_ar_hetero_6_ring", "phenol", "carboxylic_acid",
607                                "ester", "imine", 
608                };
609                
610                List<String> list = Arrays.asList(groups);
611                newGroups.addAll(list);
612        }
613
614        /**
615         * <p>To find out whether a substance is carinogenetic go to 
616         * "http://ntp-server.niehs.nih.gov/" and click
617         * on "Testing Status of Agents at NTP".</p>
618         * 
619         * Levels:
620         * <ul>
621         *      <li>CE = clear evidence</li>
622         *  <li>SE = some evidence</li>
623         *  <li>E = equivocal evidence</li>
624         *  <li>NE = no evidence</li>
625         * </ul>
626         * Levels CE and SE are positive examples. E and NE negative examples.
627         * Experiments are performed on rats and mice of both genders, so we
628         * have four evidence values. An example is positive if at least one
629         * value is SE or CE.
630         * 
631         * <p>Some values are taken from the IJCAI-97 paper of Muggleton.</p>
632         * 
633         * <p>Positives (19): <br />
634         * <ul>
635         * <li>t3 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCACAFD4-123F-7908-7B521E4F665EFBD9</li>
636         * <li>t4 (3CE+NE) - contradicts IJCAI-97 paper and should probably be case 75-52-5 instead of 75-52-8: http://ntp.niehs.nih.gov/index.cfm?objectid=BCE49084-123F-7908-7BE127F7AF1FFBB5</li>
637         * <li>t5: paper</li>
638         * <li>t7: paper</li>
639         * <li>t8: paper</li>
640         * <li>t9 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD7C6869-123F-7908-7BDEA4CFAA55CEA8</li>
641         * <li>t10: paper</li>
642         * <li>t12 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCB0ADE0-123F-7908-7BEC101C7309C4DE</li>
643         * <li>t14 (2CE+2NE) probably 111-42-2 instead of 11-42-2: http://ntp.niehs.nih.gov/index.cfm?objectid=BCC60FF1-123F-7908-7B2D579AA48DE90C</li>
644         * <li>t15: paper</li>
645         * <li>t16 (2CE+SE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCC5D9CE-123F-7908-7B959CCE5262468A</li>
646         * <li>t18 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCA087AA-123F-7908-7B79FDFDE3CDCF87</li>
647         * <li>t19 (2CE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAE5690-123F-7908-7B02E35E2BB57694</li>
648         * <li>t20 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF95607-123F-7908-7B0761D3C515CC12</li>
649         * <li>t21 (CE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFCB63C-123F-7908-7BF910C2783AE9FE</li>
650         * <li>t22 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD8345C2-123F-7908-7BC52FEF80F110E1</li>
651         * <li>t23 (4CE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCADD2D9-123F-7908-7B5C8180FE80B22F</li>
652         * <li>t24 (CE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFB19FF-123F-7908-7B845E176F13E6E1</li>
653         * <li>t25 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD2D2A62-123F-7908-7B0DA824E782754C</li>
654         * <li>t30 (2CE+SE+E) : http://ntp.niehs.nih.gov/index.cfm?objectid=BCB13734-123F-7908-7BEBA533E35A48B7</li>
655         * </ul>
656         * </p>
657         * 
658         * <p>Negatives (10):
659         * <ul>
660         * <li>t1 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD9FF53C-123F-7908-7B123DAE0A25B122 </li>
661         * <li>t2 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF8651E-123F-7908-7B21DD5ED83CD0FF </li>
662         * <li><strike>t4: paper</strike></li>
663         * <li>t6: paper</li>
664         * <li>t11: paper</li>
665         * <li>t13 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD136ED6-123F-7908-7B619EE79F2FD062</li>
666         * <li>t17: paper</li>
667         * <li>t26 (2E+2NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD1E6209-123F-7908-7B95EB8BAE662CE7</li>
668         * <li>t27 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAC5D00-123F-7908-7BC46ECB72A6C91B</li>
669         * <li>t28 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD34E02A-123F-7908-7BC6791917B591DF</li>
670         * </ul>
671         * </p>
672         * 
673         * <p>Unclear (1):
674         * <ul>
675         * <li>t29: probably a negative (see http://ntp.niehs.nih.gov/index.cfm?objectid=BD855EA1-123F-7908-7B573FC3C08188DC) but
676         * no tests directly for this substance</li>
677         * </ul>
678         * 
679         * <p>The following examples are probably not part of the IJCAI PTE-2 challenge
680         * (reports younger than 1998):
681         * <ul>
682         * <li>pos: t21 (5/99), t25 (9/04), t30(10/01)</li>
683         * <li>neg: t26 (5/99), t27 (05/01), t28 (05/00), t29 (09/02)</li>
684         * </ul>
685         * </p>
686         * </p>
687         * @return A string for all examples as used in the conf file.
688         */
689        public static String getPTE2Examples() {
690                String[] pos = new String[] {"t3","t4","t5","t7","t8",
691                                "t9",
692                                "t10","t12",
693                                "t14","t15","t16","t18","t19","t20",
694                                "t21",
695                                "t22",
696                                "t23",
697                                "t24",
698                                "t25",
699                                "t30"};
700                String[] neg = new String[] {"t1", "t2",
701                                "t6", "t11", "t13",
702                                "t17","t26","t27",
703                                "t28","t29"
704                                };
705
706                String ret = "";
707                for(String posEx : pos) {
708                        if(learnCarcinogenic)
709                                ret += "+" + getURI2(posEx) + "\n";
710                        else
711                                ret += "-" + getURI2(posEx) + "\n";
712                }
713                for(String negEx : neg) {
714                        if(learnCarcinogenic)
715                                ret += "-" + getURI2(negEx) + "\n";
716                        else
717                                ret += "+" + getURI2(negEx) + "\n";
718                }
719                
720                return ret;
721        }
722        
723        private static void addMutagenesis(OWLOntology kb) {
724                String[] mutagenicCompounds = new String[] {
725                        "d101", "d104", "d106", "d107", "d112", "d113", "d117", 
726                        "d121", "d123", "d126", "d128", "d13", "d135", "d137", 
727                        "d139", "d140", "d143", "d144", "d145", "d146", "d147",
728                        "d152", "d153", "d154", "d155", "d156", "d159", "d160",
729                        "d161", "d163", "d164", "d166", "d168", "d171", "d173",
730                        "d174", "d177", "d179", "d18", "d180", "d182", "d183",
731                        "d185", "d186", "d187", "d188", "d189", "d19", "d191",
732                        "d192", "d193", "d195", "d197", "d2", "d201", "d202", 
733                        "d205", "d206", "d207", "d211", "d214", "d215", "d216",
734                        "d224", "d225", "d227", "d228", "d229", "d231", "d235",
735                        "d237", "d239", "d242", "d245", "d246", "d249", "d251",
736                        "d254", "d257", "d258", "d261", "d264", "d266", "d269",
737                        "d27", "d270", "d271", "d28", "d288", "d292", "d297",
738                        "d300", "d308", "d309", "d311", "d313", "d314", "d322",
739                        "d323", "d324", "d329", "d330", "d332", "d334", "d35",
740                        "d36", "d37", "d38", "d41", "d42", "d48", "d50", "d51",
741                        "d54", "d58", "d61", "d62", "d63", "d66", "d69", "d72",
742                        "d76", "d77", "d78", "d84", "d86", "d89", "d92", "d96"};
743                TreeSet<String> mutagenic = new TreeSet<>(Arrays.asList(mutagenicCompounds));
744        
745                for(String compound : compounds) {
746                        if(mutagenic.contains(compound)) {
747                                OWLAxiom muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", true);
748                                man.addAxiom(kb, muta);
749                        } else {
750                                OWLAxiom muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", false);
751                                man.addAxiom(kb, muta);
752                        }
753                }
754        }
755}