001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.decisiontrees.tdt; 020 021import java.util.ArrayList; 022import java.util.Set; 023import java.util.SortedSet; 024import java.util.Stack; 025import java.util.TreeSet; 026 027//import org.dllearner.algorithms.celoe.CELOE; 028import org.dllearner.core.AbstractClassExpressionLearningProblem; 029import org.dllearner.core.AbstractReasonerComponent; 030import org.dllearner.core.ComponentAnn; 031import org.dllearner.core.ComponentInitException; 032import org.dllearner.core.EvaluatedDescription; 033import org.dllearner.refinementoperators.RefinementOperator; 034import org.semanticweb.owlapi.model.OWLClassExpression; 035import org.semanticweb.owlapi.model.OWLIndividual; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import org.dllearner.algorithms.decisiontrees.refinementoperators.DLTreesRefinementOperator; 039import org.dllearner.algorithms.decisiontrees.tdt.model.DLTree; 040import org.dllearner.algorithms.decisiontrees.utils.Couple; 041import org.dllearner.algorithms.decisiontrees.utils.Npla; 042import org.dllearner.algorithms.decisiontrees.utils.Split; 043import org.dllearner.learningproblems.PosNegUndLP; 044//import evaluation.Parameters; 045//import knowledgeBasesHandler.KnowledgeBase; 046@ComponentAnn(name="TDT", shortName="tdt", version=1.0, description="A Terminological Decision Tree") 047 048public class TDTClassifier extends AbstractTDTClassifier { 049 private static Logger logger = LoggerFactory.getLogger(TDTClassifier.class); 050 private DLTree currentmodel; 051 052 053 //private RefinementOperator op; 054 055 /** 056 * Empty constructor for Spring 057 */ 058 public TDTClassifier(){ 059 super(); 060 061 } 062 063 064 // public TDTClassifier(KnowledgeBase k){ 065 // 066 // super(k); 067 // 068 // } 069 // 070 071 public TDTClassifier(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner, RefinementOperator op){ 072 super(problem, reasoner,op); 073 } 074 075 @Override 076 public void init() throws ComponentInitException{ 077 078 super.init(); 079// if(operator==null){ 080// operator=new TDTRefinementOperatorWrapper(super.learningProblem,reasoner,10); 081// 082// } 083// else{ 084// ((TDTRefinementOperatorWrapper)operator).setBeam(10); 085// ((TDTRefinementOperatorWrapper)operator).setLp(learningProblem); 086// ((TDTRefinementOperatorWrapper)operator).setRs(reasoner); 087// } 088 initialized = true; 089 } 090 091 092 @Override 093 public DLTree induceDLTree(SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs) { 094 logger.info("Learning problem\t p:"+posExs.size()+"\t n:"+negExs.size()+"\t u:"+undExs.size()+"\t prPos:"+prPos+"\t prNeg:"+prNeg+"\n"); 095 //ArrayList<OWLIndividual> truePos= posExs; 096 //ArrayList<OWLIndividual> trueNeg= negExs; 097 098 DLTreesRefinementOperator dlTreesRefinementOperator = (DLTreesRefinementOperator)operator; 099 100 Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> examples = new Npla<>(posExs, negExs, undExs, 10, prPos, prNeg); 101 DLTree tree = new DLTree(); // new (sub)tree 102 Stack<Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>>> stack= new Stack<>(); 103 Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> toInduce= new Couple<>(); 104 toInduce.setFirstElement(tree); 105 toInduce.setSecondElement(examples); 106 stack.push(toInduce); 107 108 Stack<DLTree> lastTrees= new Stack<>(); // for refine hierarchically a concept 109 110 while(!stack.isEmpty()){ 111 //System.out.printf("Stack: %d \n",stack.size()); 112 Couple<DLTree, Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double>> current= stack.pop(); // extract the next element 113 DLTree currentTree= current.getFirstElement(); 114 Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> currentExamples= current.getSecondElement(); 115 // set of negative, positive and undefined example 116 posExs=currentExamples.getFirst(); 117 negExs=currentExamples.getSecond(); 118 undExs=currentExamples.getThird(); 119 if (posExs.size() == 0 && negExs.size() == 0) // no exs 120 if (prPos >= prNeg) { // prior majority 121 currentTree.setRoot(OWL_THING); // set positive leaf 122 } 123 else { // prior majority of negatives 124 currentTree.setRoot(OWL_NOTHING); // set negative leaf 125 } 126 127 // double numPos = posExs.size() + undExs.size()*prPos; 128 // double numNeg = negExs.size() + undExs.size()*prNeg; 129 else{ 130 double numPos = posExs.size(); 131 double numNeg = negExs.size(); 132 double perPos = numPos/(numPos+numNeg); 133 double perNeg = numNeg/(numPos+numNeg); 134 // prPos=perPos; 135 // prNeg=perNeg; 136 137 if (perNeg==0 && perPos > puritythreshold) { // no negative 138 currentTree.setRoot(dataFactory.getOWLThing()); // set positive leaf 139 140 } 141 else{ 142 if (perPos==0 && perNeg > puritythreshold) { // no positive 143 currentTree.setRoot(dataFactory.getOWLNothing()); // set negative leaf 144 145 } 146 // else (a non-leaf node) ... 147 else{ 148 OWLClassExpression[] cConcepts= new OWLClassExpression[0]; 149 150 Set<OWLClassExpression> refine = null; //dlTreesRefinementOperator.refine(dataFactory.getOWLThing(), posExs, negExs); 151 if (lastTrees.isEmpty()){ 152 refine = dlTreesRefinementOperator.refine(dataFactory.getOWLThing(), posExs, negExs); 153 } 154 else 155 refine = dlTreesRefinementOperator.refine(lastTrees.pop().getRoot(), posExs, negExs); 156 157 158 159 ArrayList<OWLClassExpression> cConceptsL = new ArrayList<>(refine); 160 // cConceptsL= getRandomSelection(cConceptsL); // random selection of feature set 161 162 163 cConcepts = cConceptsL.toArray(cConcepts); 164 165 // select node concept 166 //OWLClassExpression newRootConcept = //Parameters.CCP?(h 167 OWLClassExpression newRootConcept = null; 168 if (dlTreesRefinementOperator.getRo()==DLTreesRefinementOperator.ORIGINAL) 169 newRootConcept= ccp?heuristic.selectBestConceptCCP(cConcepts, posExs, negExs, undExs, prPos, prNeg):(heuristic.selectBestConcept(cConcepts, posExs, negExs, undExs, prPos, prNeg)); 170 else 171 newRootConcept= heuristic.selectWorstConcept(cConcepts, posExs, negExs, undExs, perPos, perNeg); 172 SortedSet<OWLIndividual> posExsT = new TreeSet<>(); 173 SortedSet<OWLIndividual> negExsT = new TreeSet<>(); 174 SortedSet<OWLIndividual> undExsT = new TreeSet<>(); 175 SortedSet<OWLIndividual> posExsF = new TreeSet<>(); 176 SortedSet<OWLIndividual> negExsF = new TreeSet<>(); 177 SortedSet<OWLIndividual> undExsF = new TreeSet<>(); 178 179 Split.split(newRootConcept, dataFactory, reasoner, posExs, negExs, undExs, posExsT, negExsT, undExsT, posExsF, negExsF, undExsF); 180 // select node concept 181 currentTree.setRoot(newRootConcept); 182 // build subtrees 183 184 185 DLTree posTree= new DLTree(); 186 DLTree negTree= new DLTree(); // recursive calls simulation 187 currentTree.setPosTree(posTree); 188 currentTree.setNegTree(negTree); 189 Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> npla1 = new Npla<>(posExsT, negExsT, undExsT, 10, perPos, perNeg); 190 Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double> npla2 = new Npla<>(posExsF, negExsF, undExsF, 10, perPos, perNeg); 191 Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> pos= new Couple<>(); 192 pos.setFirstElement(posTree); 193 pos.setSecondElement(npla1); 194 195 // negative branch 196 Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> neg= new Couple<>(); 197 neg.setFirstElement(negTree); 198 neg.setSecondElement(npla2); 199 stack.push(neg); 200 stack.push(pos); 201 lastTrees.push(currentTree); 202 } 203 } 204 } 205 } 206 stop= true; 207 208 return tree; 209 210 } 211 212/** 213 * Procedure for deriving a concept description from a TDT classifier 214 * @param model 215 * @return 216 */ 217public OWLClassExpression deriveDefinition(DLTree model){ 218 OWLClassExpression definition = DLTree.deriveDefinition(model, true); 219 if (definition.isOWLThing()) 220 return dataFactory.getOWLObjectComplementOf(DLTree.deriveDefinition(model, false)); // derive concept definition from the positive instances 221 else return definition; 222 //it is possible to derive the complement concept description by setting the flag to false 223 224} 225 226 227@Override 228public void start() { 229 230 // TODO Auto-generated method stub 231 232 PosNegUndLP posNegUndLP = (PosNegUndLP)learningProblem; 233 SortedSet<OWLIndividual> posExs = (SortedSet<OWLIndividual>)(posNegUndLP.getPositiveExamples()); 234 SortedSet<OWLIndividual> negExs = (SortedSet<OWLIndividual>)posNegUndLP.getNegativeExamples(); 235 SortedSet<OWLIndividual> undExs = (SortedSet<OWLIndividual>)posNegUndLP.getUncertainExamples(); 236 237 System.out.println(posExs.size()); 238 System.out.println(negExs.size()); 239 System.out.println(undExs.size()); 240 if (binaryClassification){ 241 SortedSet<OWLIndividual> allExamples= new TreeSet<>(); 242 allExamples.addAll(posExs); 243 allExamples.addAll(negExs); 244 allExamples.addAll(undExs); 245 //System.out.printf("--- Query Concept #%d \n",c); 246 247 248 249 // the individuals of the ABox are the training individuals 250 251 OWLIndividual[] trainingExs= allExamples.toArray(new OWLIndividual[allExamples.size()]); 252 Split.splitting(dataFactory, reasoner, trainingExs, posExs, negExs, undExs, classToDescribe, binaryClassification); 253 } 254 255 prPos = (double)posExs.size()/(posExs.size()+ negExs.size()+ undExs.size()); 256 prNeg = (double)negExs.size()/(posExs.size()+ negExs.size()+ undExs.size()); 257 258 logger.debug("Training set composition: "+ posExs.size()+" - "+ negExs.size()+"-"+undExs.size()); 259 260 double normSum = prPos+prNeg; 261 if (normSum==0) { prPos=.5; prNeg=.5; } 262 else { prPos=prPos/normSum; prNeg=prNeg/normSum; } 263 264 logger.info("New learning problem prepared.\n"); 265 logger.info("Learning a tree "); 266 267 268 DLTree tree= this.induceDLTree(posExs, negExs, undExs); //tree induction 269 currentmodel=tree; 270 271 stop(); 272 273 274} 275 276 @Override 277 public OWLClassExpression getCurrentlyBestDescription() { 278 // TODO Auto-generated method stub 279 return DLTree.deriveDefinition(currentmodel, false); 280 } 281 282 @Override 283 public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { 284 // TODO Auto-generated method stub 285 return null; 286 } 287 288 public DLTree getCurrentmodel() { 289 // TODO Auto-generated method stub 290 return currentmodel; 291 } 292 293}