001/** 002 * This file is part of LEAP. 003 * 004 * LEAP was implemented as a plugin of DL-Learner http://dl-learner.org, but 005 * some components can be used as stand-alone. 006 * 007 * LEAP is free software; you can redistribute it and/or modify it under the 008 * terms of the GNU General Public License as published by the Free Software 009 * Foundation; either version 3 of the License, or (at your option) any later 010 * version. 011 * 012 * LEAP is distributed in the hope that it will be useful, but WITHOUT ANY 013 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 014 * A PARTICULAR PURPOSE. See the GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License along with 017 * this program. If not, see <http://www.gnu.org/licenses/>. 018 * 019 */ 020package org.dllearner.cli.unife; 021 022import java.text.DecimalFormat; 023import java.util.Collections; 024import java.util.HashSet; 025import java.util.LinkedList; 026import java.util.List; 027import java.util.Random; 028import java.util.Set; 029import java.util.TreeSet; 030import mpi.MPI; 031import org.apache.log4j.Logger; 032//import org.dllearner.core.AbstractCELA; 033import org.dllearner.core.AbstractLearningProblem; 034import org.dllearner.core.AbstractReasonerComponent; 035import org.dllearner.core.ComponentInitException; 036import org.dllearner.learningproblems.ClassLearningProblem; 037//import org.dllearner.learningproblems.Heuristics; 038import org.dllearner.learningproblems.PosNegLP; 039import org.dllearner.learningproblems.PosOnlyLP; 040import org.apache.commons.io.FilenameUtils; 041import org.dllearner.cli.CrossValidation; 042import org.dllearner.core.probabilistic.unife.AbstractPSLA; 043import org.dllearner.algorithms.probabilistic.parameter.unife.edge.AbstractEDGE; 044import org.dllearner.algorithms.probabilistic.structure.unife.leap.AbstractLEAP; 045import org.dllearner.utils.unife.OWLUtils; 046import org.dllearner.utils.unife.ReflectionHelper; 047import org.semanticweb.owlapi.apibinding.OWLManager; 048import org.semanticweb.owlapi.model.AxiomType; 049import org.semanticweb.owlapi.model.OWLAxiom; 050import org.semanticweb.owlapi.model.OWLClass; 051import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; 052import org.semanticweb.owlapi.model.OWLDataFactory; 053import org.semanticweb.owlapi.model.OWLIndividual; 054import org.semanticweb.owlapi.model.OWLOntology; 055import org.semanticweb.owlapi.model.OWLOntologyCreationException; 056import org.semanticweb.owlapi.model.OWLOntologyStorageException; 057import unife.bundle.utilities.BundleUtilities; 058import unife.edge.mpi.MPIUtilities; 059 060/** 061 * Performs a pseudo cross validation for the given problem using the LEAP 062 * system. It is not a real k-fold cross validation, because this class executes 063 * only a k-fold training. It produces k output file which must be submitted to 064 * testing 065 * 066 * @author Jens Lehmann 067 * @author Giuseppe Cota <giuseppe.cota@unife.it> 068 */ 069public class LEAPCrossValidation extends CrossValidation { 070 071 private static final Logger logger = Logger.getLogger(LEAPCrossValidation.class); 072 073 public LEAPCrossValidation(AbstractPSLA psla, int folds, boolean leaveOneOut, boolean parallel) throws OWLOntologyStorageException, OWLOntologyCreationException { 074 075 boolean master = true; 076 077 if (parallel) { 078 master = MPIUtilities.isMaster(MPI.COMM_WORLD); 079 } 080 081 AbstractLearningProblem lp = psla.getLearningProblem(); 082 083 DecimalFormat df = new DecimalFormat(); 084 085 // the training sets used later on 086 List<Set<OWLIndividual>> trainingSetsPos = new LinkedList<>(); 087 List<Set<OWLIndividual>> trainingSetsNeg = new LinkedList<>(); 088 List<Set<OWLIndividual>> testSetsPos = new LinkedList<>(); 089 List<Set<OWLIndividual>> testSetsNeg = new LinkedList<>(); 090 091 // get individuals and shuffle them too 092 Set<OWLIndividual> posExamples = new HashSet(); 093 Set<OWLIndividual> negExamples = new HashSet(); 094 logger.debug("Setting cross validation"); 095 if (lp instanceof PosNegLP) { 096 posExamples = ((PosNegLP) lp).getPositiveExamples(); 097 negExamples = ((PosNegLP) lp).getNegativeExamples(); 098 } else if (lp instanceof PosOnlyLP) { 099 posExamples = ((PosNegLP) lp).getPositiveExamples(); 100 //negExamples = Helper.difference(lp.getReasoner().getIndividuals(), posExamples); 101 negExamples = new HashSet<>(); 102 } else if (lp instanceof ClassLearningProblem) { 103 try { 104 posExamples = new HashSet((List<OWLIndividual>) ReflectionHelper.getPrivateField(lp, "classInstances")); 105 negExamples = new HashSet((List<OWLIndividual>) ReflectionHelper.getPrivateField(lp, "superClassInstances")); 106 // if the number of negative examples is lower than the number of folds 107 // get as negative examples all the individuals that are not instances of ClassToDescribe 108 if (negExamples.size() < folds) { 109 logger.info("The number of folds is higher than the number of " 110 + "negative examples. Selecting the instances of Thing which " 111 + "are non instances of ClasstoDescribe as negative Examples"); 112 AbstractReasonerComponent reasoner = lp.getReasoner(); 113 // get as negative examples all the individuals which belong to the class Thing 114 // but not to the ClassToDescribe 115 negExamples = reasoner.getIndividuals(OWLManager.getOWLDataFactory().getOWLThing()); 116 negExamples.removeAll(posExamples); 117 } 118 } catch (Exception e) { 119 logger.error("Cannot get positive and negative individuals for the cross validation"); 120 logger.error(e); 121 System.exit(-2); 122 } 123 } else { 124 throw new IllegalArgumentException("Only ClassLearningProblem, PosNeg and PosOnly learning problems are supported"); 125 } 126 List<OWLIndividual> posExamplesList = new LinkedList<>(posExamples); 127 List<OWLIndividual> negExamplesList = new LinkedList<>(negExamples); 128 Collections.shuffle(posExamplesList, new Random(1)); 129 Collections.shuffle(negExamplesList, new Random(2)); 130 131 // sanity check whether nr. of folds makes sense for this benchmark 132 if (!leaveOneOut && (posExamples.size() < folds || negExamples.size() < folds)) { 133 logger.error("The number of folds is higher than the number of " 134 + "positive/negative examples. This can result in empty test sets. Exiting."); 135 System.exit(0); 136 } 137 138 if (leaveOneOut) { 139 // note that leave-one-out is not identical to k-fold with 140 // k = nr. of examples in the current implementation, because 141 // with n folds and n examples there is no guarantee that a fold 142 // is never empty (this is an implementation issue) 143 int nrOfExamples = posExamples.size() + negExamples.size(); 144 for (int i = 0; i < nrOfExamples; i++) { 145 // ... 146 } 147 logger.error("Leave-one-out not supported yet."); 148 System.exit(1); 149 } else { 150 // calculating where to split the sets, ; note that we split 151 // positive and negative examples separately such that the 152 // distribution of positive and negative examples remains similar 153 // (note that there are better but more complex ways to implement this, 154 // which guarantee that the sum of the elements of a fold for pos 155 // and neg differs by at most 1 - it can differ by 2 in our implementation, 156 // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) 157 int[] splitsPos = calculateSplits(posExamples.size(), folds); 158 int[] splitsNeg = calculateSplits(negExamples.size(), folds); 159 160// System.out.println(splitsPos[0]); 161// System.out.println(splitsNeg[0]); 162 // calculating training and test sets 163 for (int i = 0; i < folds; i++) { 164 Set<OWLIndividual> testPos = getTestingSet(posExamplesList, splitsPos, i); 165 Set<OWLIndividual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); 166 testSetsPos.add(i, testPos); 167 testSetsNeg.add(i, testNeg); 168 trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); 169 trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); 170 } 171 172 } 173 174 String completeLearnedOntology = psla.getOutputFile(); 175 String cloBase = FilenameUtils.removeExtension(completeLearnedOntology); 176 String cloExt = FilenameUtils.getExtension(completeLearnedOntology); 177 178 String positiveFile = "posExamples.owl"; 179 String pfBase = FilenameUtils.removeExtension(positiveFile); 180 String pfExt = FilenameUtils.getExtension(positiveFile); 181 String negativeFile = "negExamples.owl"; 182 String nfBase = FilenameUtils.removeExtension(negativeFile); 183 String nfExt = FilenameUtils.getExtension(negativeFile); 184 185 logger.debug("Performing Cross Validation"); 186 // run the algorithm 187 for (int currFold = 0; currFold < folds; currFold++) { 188 logger.debug("Current Fold: " + (currFold + 1)); 189 // setting positive and negative individuals 190 final Set<OWLIndividual> trainPos = trainingSetsPos.get(currFold); 191 final Set<OWLIndividual> trainNeg = trainingSetsNeg.get(currFold); 192 final Set<OWLIndividual> testPos = testSetsPos.get(currFold); 193 final Set<OWLIndividual> testNeg = testSetsNeg.get(currFold); 194 if (lp instanceof PosNegLP) { 195 ((PosNegLP) lp).setPositiveExamples(trainPos); 196 ((PosNegLP) lp).setNegativeExamples(trainNeg); 197 try { 198 lp.init(); 199 } catch (ComponentInitException e) { 200 logger.error(e); 201 logger.error(e.getLocalizedMessage()); 202 System.exit(-2); 203 } 204 } else if (lp instanceof PosOnlyLP) { 205 // il cross training viene fatto solo per gli esempi/individui positivi 206 ((PosOnlyLP) lp).setPositiveExamples(new TreeSet<OWLIndividual>(trainPos)); 207 try { 208 lp.init(); 209 } catch (ComponentInitException e) { 210 logger.error(e); 211 logger.error(e.getLocalizedMessage()); 212 System.exit(-2); 213 } 214 // set negative f 215 } else if (lp instanceof ClassLearningProblem) { 216 try { 217 // Initialize the ClassLearningProblem object first and then 218 // modify his private fields 219 //lp.init(); 220 ReflectionHelper.setPrivateField(lp, "classInstances", trainPos); 221 ReflectionHelper.setPrivateField(lp, "superClassInstances", trainNeg); 222 ReflectionHelper.setPrivateField(lp, "negatedClassInstances", trainNeg); 223 } catch (Exception e) { 224 logger.error("Cannot set positive and negative individuals for the cross validation"); 225 logger.error(e); 226 System.exit(-2); 227 } 228 } 229 230 AbstractEDGE edge = (AbstractEDGE) psla.getLearningParameterAlgorithm(); 231 OWLOntology startOntology = null; 232 try { 233 startOntology = BundleUtilities.copyOntology(edge.getSourcesOntology()); 234 235 } catch (OWLOntologyCreationException e) { 236 e.printStackTrace(); 237 } 238 239 psla.setOutputFile(cloBase + (currFold + 1) + "." + cloExt); 240 try { 241 //rs.init(); 242 edge.init(); 243 psla.init(); 244 //edge.setPositiveFile(pfBase + (currFold + 1) + "." + pfExt); 245 //edge.setNegativeFile(nfBase + (currFold + 1) + "." + nfExt); 246 //edge.init(); 247 } catch (ComponentInitException e) { 248 // TODO Auto-generated catch block 249 e.printStackTrace(); 250 } 251 252 psla.start(); 253 254 if (master) { 255 Set<OWLAxiom> posExamplesAxioms = edge.getPositiveExampleAxioms(); 256 Set<OWLAxiom> negExamplesAxioms = edge.getNegativeExampleAxioms(); 257 OWLDataFactory odf = OWLManager.getOWLDataFactory(); 258 // in the case replace superClass 259 if (lp instanceof ClassLearningProblem) { 260 ClassLearningProblem clp = (ClassLearningProblem) lp; 261 Set<OWLAxiom> tempPos = new HashSet<>(); 262 Set<OWLAxiom> tempNeg = new HashSet<>(); 263 264 for (OWLAxiom ax : posExamplesAxioms) { 265 if (ax.isOfType(AxiomType.CLASS_ASSERTION)) { 266 OWLClassAssertionAxiom ax1 = (OWLClassAssertionAxiom) ax; 267 tempPos.add(odf.getOWLClassAssertionAxiom(clp.getClassToDescribe(), ax1.getIndividual())); 268 } 269 } 270 for (OWLAxiom ax : negExamplesAxioms) { 271 if (ax.isOfType(AxiomType.CLASS_ASSERTION)) { 272 OWLClassAssertionAxiom ax1 = (OWLClassAssertionAxiom) ax; 273 tempNeg.add(odf.getOWLClassAssertionAxiom(clp.getClassToDescribe(), ax1.getIndividual())); 274 } 275 } 276 posExamplesAxioms = tempPos; 277 negExamplesAxioms = tempNeg; 278 } 279 // convert test set into axioms 280 Set<OWLAxiom> testAxiomsPos = new HashSet<>(); 281 Set<OWLAxiom> testAxiomsNeg = new HashSet<>(); 282 OWLClass clazz = ((AbstractLEAP) psla).getDummyClass(); 283 if (lp instanceof ClassLearningProblem) { 284 clazz = ((ClassLearningProblem) lp).getClassToDescribe(); 285 } 286 for (OWLIndividual ind : testPos) { 287 testAxiomsPos.add(odf.getOWLClassAssertionAxiom(clazz, ind)); 288 } 289 for (OWLIndividual ind : testNeg) { 290 testAxiomsNeg.add(odf.getOWLClassAssertionAxiom(clazz, ind)); 291 } 292 293 OWLUtils.saveAxioms(testAxiomsPos, "posTestExamples" + (currFold + 1) + "." + pfExt, "OWLXML"); 294 OWLUtils.saveAxioms(testAxiomsNeg, "negTestExamples" + (currFold + 1) + "." + nfExt, "OWLXML"); 295 OWLUtils.saveAxioms(posExamplesAxioms, pfBase + (currFold + 1) + "." + pfExt, "OWLXML"); 296 OWLUtils.saveAxioms(negExamplesAxioms, nfBase + (currFold + 1) + "." + nfExt, "OWLXML"); 297 } 298 } 299 } 300 301 protected int getCorrectPosClassified(AbstractReasonerComponent rs, OWLClass concept, Set<OWLIndividual> testSetPos) { 302 return rs.hasType(concept, testSetPos).size(); 303 } 304 305 protected int getCorrectNegClassified(AbstractReasonerComponent rs, OWLClass concept, Set<OWLIndividual> testSetNeg) { 306 return testSetNeg.size() - rs.hasType(concept, testSetNeg).size(); 307 } 308 309 public static Set<OWLIndividual> getTestingSet(List<OWLIndividual> examples, int[] splits, int fold) { 310 int fromIndex; 311 // we either start from 0 or after the last fold ended 312 if (fold == 0) { 313 fromIndex = 0; 314 } else { 315 fromIndex = splits[fold - 1]; 316 } 317 // the split corresponds to the ends of the folds 318 int toIndex = splits[fold]; 319 320// System.out.println("from " + fromIndex + " to " + toIndex); 321 Set<OWLIndividual> testingSet = new HashSet<>(); 322 // +1 because 2nd element is exclusive in subList method 323 testingSet.addAll(examples.subList(fromIndex, toIndex)); 324 return testingSet; 325 } 326 327}