001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.decisiontrees.tdt; 020 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.List; 024import java.util.SortedSet; 025import java.util.Stack; 026 027import org.dllearner.algorithms.decisiontrees.heuristics.TreeInductionHeuristics; 028import org.dllearner.algorithms.decisiontrees.refinementoperators.DLTreesRefinementOperator; 029import org.dllearner.algorithms.decisiontrees.tdt.model.DLTree; 030import org.dllearner.core.AbstractCELA; 031import org.dllearner.core.AbstractClassExpressionLearningProblem; 032import org.dllearner.core.AbstractReasonerComponent; 033import org.dllearner.core.ComponentAnn; 034import org.dllearner.core.ComponentInitException; 035import org.dllearner.core.config.ConfigOption; 036import org.dllearner.refinementoperators.RefinementOperator; 037import org.semanticweb.owlapi.model.OWLClassExpression; 038import org.semanticweb.owlapi.model.OWLIndividual; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042@ComponentAnn(name="ATDT", shortName="atdt", version=1.0, description="An abstract Terminological Decision Tree") 043public abstract class AbstractTDTClassifier extends AbstractCELA { 044 045 private static Logger logger = LoggerFactory.getLogger(AbstractTDTClassifier.class); 046 047 protected boolean stop; 048 049 public double getPuritythreshold() { 050 return puritythreshold; 051 } 052 053 public void setPuritythreshold(double puritythreshold) { 054 this.puritythreshold = puritythreshold; 055 } 056 057// public int getBeam() { 058// return beam; 059// } 060 061// public void setBeam(int beam) { 062// this.beam = beam; 063// } 064 065 public boolean isBinaryClassification() { 066 return binaryClassification; 067 } 068 069 public void setBinaryClassification(boolean binaryClassification) { 070 this.binaryClassification = binaryClassification; 071 } 072 073 public OWLClassExpression getClassToDescribe() { 074 return classToDescribe; 075 } 076 077 public void setClassToDescribe(OWLClassExpression classToDescribe) { 078 this.classToDescribe = classToDescribe; 079 } 080 081 public TreeInductionHeuristics getHeuristic() { 082 return heuristic; 083 } 084 085 public void setHeuristic(TreeInductionHeuristics heuristic) { 086 this.heuristic = heuristic; 087 } 088 089 public RefinementOperator getOperator() { 090 return operator; 091 } 092 093 public void setOperator(RefinementOperator operator) { 094 this.operator = operator; 095 } 096 097 @ConfigOption(defaultValue = "0.05", description = "Purity threshold for setting a leaf") 098 protected double puritythreshold; 099 100 //@ConfigOption(defaultValue = "4") 101 //protected int beam; 102 103 @ConfigOption(defaultValue = "false", description = "if it is a binary classification problem") 104 protected boolean binaryClassification; 105 106 @ConfigOption(defaultValue = "false", description = "value for limiting the number of generated concepts") 107 protected boolean ccp; 108 109 public boolean isCcp() { 110 return ccp; 111 } 112 113 public void setCcp(boolean ccp) { 114 this.ccp = ccp; 115 } 116 117 @ConfigOption(defaultValue = "false", description = "for overcoming the problem of missing values in tree algorithms.tree.models") 118 protected boolean missingValueTreatmentForTDT; 119 protected double prPos; 120 protected double prNeg; 121 @ConfigOption(description = "concept for splitting undefined examples into positive and negative for binary classification problems") 122 protected OWLClassExpression classToDescribe; //target concept 123 @ConfigOption(description = "the heuristic instance to use", defaultValue = "TreeInductionHeuristics") 124 protected TreeInductionHeuristics heuristic; // heuristic 125 //protected LengthLimitedRefinementOperator operator ;// refinement operator 126 127 @ConfigOption(description = "the refinement operator instance to use", defaultValue = "DLTreesRefinementOperator") 128 protected RefinementOperator operator; 129 130 public boolean isMissingValueTreatmentForTDT() { 131 return missingValueTreatmentForTDT; 132 } 133 134 public void setMissingValueTreatmentForTDT(boolean missingValueTreatmentForTDT) { 135 this.missingValueTreatmentForTDT = missingValueTreatmentForTDT; 136 } 137 138 139 140 public AbstractTDTClassifier(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner, RefinementOperator op) { 141 super(problem, reasoner); 142 // configurator = new CELOEConfigurator(this); 143 144 this.operator=op; 145 System.out.println(operator==null); 146 } 147 148 149 @Override 150 public void start() { 151 152 // TODO Auto-generated method stub 153 154 155 156 } 157 158 159 @Override 160 public void init() throws ComponentInitException { 161 // TODO Auto-generated method stub 162 baseURI = reasoner.getBaseURI(); 163 prefixes = reasoner.getPrefixes(); 164 165 // if no one injected a heuristic, we use a default one 166 if(heuristic == null) { 167 heuristic = new TreeInductionHeuristics(); 168 heuristic.setProblem(learningProblem); 169 heuristic.setReasoner(reasoner); 170 heuristic.init(); 171 } 172 173 174 175 176 if(operator == null) { 177 System.out.println("OPERATOR:"+operator==null); 178// // default operator 179 operator = new DLTreesRefinementOperator(); 180 ((DLTreesRefinementOperator)operator).setReasoner(reasoner); 181 ((DLTreesRefinementOperator)operator).setBeam(10); // default value 182//// 183//// if(operator instanceof CustomStartRefinementOperator) { 184//// ((CustomStartRefinementOperator)operator).setStartClass(startClass); 185//// } 186//// if(operator instanceof ReasoningBasedRefinementOperator) { 187//// ((ReasoningBasedRefinementOperator)operator).setReasoner(reasoner); 188//// } 189 operator.init(); 190 191 //System.out.println(operator==null); 192// 193// 194 } 195 196 //start to learn the new current concept description 197 198 199 initialized = true; 200 } 201 202 @Override 203 public void stop() { 204 // TODO Auto-generated method stub 205 stop = true; 206 207 } 208 209 @Override 210 public boolean isRunning() { 211 // TODO Auto-generated method stub 212 return (!stop); 213 } 214 215 public abstract DLTree induceDLTree(SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs); 216 217 public int classify(OWLIndividual indTestEx, DLTree trees) { 218 219 //int length = testConcepts!=null?testConcepts.length:1; 220 //for (int c=0; c < length; c++) { 221 if (missingValueTreatmentForTDT){ 222 ArrayList<Integer> list= new ArrayList<>(); 223 return classifyExample(list,indTestEx, trees); 224 225 } 226 else 227 return classifyExample(indTestEx, trees); 228 229 //} // for c 230 231 } 232 233 234 public int classifyExample(OWLIndividual indTestEx, DLTree tree) { 235 236 Stack<DLTree> stack= new Stack<>(); 237 //OWLDataFactory dataFactory = kb.getDataFactory(); 238 stack.add(tree); 239 int result=0; 240 boolean stop=false; 241 242 if (!binaryClassification){ 243 while(!stack.isEmpty() && !stop){ 244 DLTree currentTree= stack.pop(); 245 246 OWLClassExpression rootClass = currentTree.getRoot(); 247 // System.out.println("Root class: "+ rootClass); 248 if (rootClass.equals(dataFactory.getOWLThing())){ 249 stop=true; 250 result=+1; 251 252 } 253 else if (rootClass.equals(dataFactory.getOWLNothing())){ 254 stop=true; 255 result=-1; 256 257 }else if (reasoner.hasType(rootClass, indTestEx)) 258 stack.push(currentTree.getPosSubTree()); 259 else if (reasoner.hasType(dataFactory.getOWLObjectComplementOf(rootClass), indTestEx)) 260 stack.push(currentTree.getNegSubTree()); 261 else { 262 stop=true; 263 result=0; 264 265 } 266 267 } 268 }else{ 269 while(!stack.isEmpty() && !stop){ 270 DLTree currentTree= stack.pop(); 271 272 OWLClassExpression rootClass = currentTree.getRoot(); 273 // System.out.println("Root class: "+ rootClass); 274 if (rootClass.equals(dataFactory.getOWLThing())){ 275 stop=true; 276 result=+1; 277 278 } 279 else if (rootClass.equals(dataFactory.getOWLNothing())){ 280 stop=true; 281 result=-1; 282 283 }else if (reasoner.hasType(rootClass, indTestEx)) 284 stack.push(currentTree.getPosSubTree()); 285 else 286 stack.push(currentTree.getNegSubTree()); // for those kb having no full complement 287 288 } 289 } 290 291 292 return result; 293 294} 295 296/** 297 * Alternative exploration of a Tree classifier for DL ontology 298 * @param list 299 * @param indTestEx 300 * @param tree 301 * @return 302 */ 303public int classifyExample(List<Integer> list, OWLIndividual indTestEx, DLTree tree) { 304 Stack<DLTree> stack= new Stack<>(); 305 //OWLDataFactory dataFactory = kb.getDataFactory(); 306 stack.add(tree); 307 int result=0; 308 boolean stop=false; 309 while(!stack.isEmpty() && !stop){ 310 DLTree currentTree= stack.pop(); 311 312 OWLClassExpression rootClass = currentTree.getRoot(); 313 // System.out.println("Root class: "+ rootClass); 314 if (rootClass.equals(dataFactory.getOWLThing())){ 315 // stop=true; 316 result=+1; 317 list.add(result); 318 319 } 320 else if (rootClass.equals(dataFactory.getOWLNothing())){ 321 // stop=true; 322 result=-1; 323 list.add(result); 324 325 }else if (reasoner.hasType(rootClass, indTestEx)) 326 stack.push(currentTree.getPosSubTree()); 327 else if (reasoner.hasType(dataFactory.getOWLObjectComplementOf(rootClass), indTestEx)) 328 stack.push(currentTree.getNegSubTree()); 329 else { 330 // stop=true; 331 result=0; 332 stack.push(currentTree.getPosSubTree()); 333 stack.push(currentTree.getNegSubTree()); 334 335 } 336 } 337 338 int posFr= Collections.frequency(list, +1); 339 int negFr= Collections.frequency(list, -1); 340 341 if (posFr>negFr) 342 return +1; 343 else 344 return -1; 345 346} 347 348/*protected OWLClassExpression selectBestConcept(OWLClassExpression[] concepts, ArrayList<OWLIndividual> posExs, ArrayList<OWLIndividual> negExs, 349 ArrayList<OWLIndividual> undExs, double prPos, double prNeg) { 350 351 int[] counts; 352 353 int bestConceptIndex = 0; 354 355 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 356 System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 357 "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 358 359 double bestGain = gain(counts, prPos, prNeg); 360 361 System.out.printf("%+10e\n",bestGain); 362 363 System.out.println(concepts[0]); 364 365 for (int c=1; c<concepts.length; c++) { 366 367 counts = getSplitCounts(concepts[c], posExs, negExs, undExs); 368 System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 369 "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 370 371 double thisGain = gain(counts, prPos, prNeg); 372 System.out.printf("%+10e\n",thisGain); 373 System.out.println(concepts[c]); 374 if(thisGain < bestGain) { 375 bestConceptIndex = c; 376 bestGain = thisGain; 377 } 378 } 379 380 System.out.printf("best gain: %f \t split #%d\n", bestGain, bestConceptIndex); 381 return concepts[bestConceptIndex]; 382}*/ 383 384public AbstractTDTClassifier() { 385 super(); 386} 387 388/*private int[] getSplitCounts(OWLClassExpression concept, ArrayList<OWLIndividual> posExs, ArrayList<OWLIndividual> negExs, 389 ArrayList<OWLIndividual> undExs) { 390 391 int[] counts = new int[9]; 392 ArrayList<OWLIndividual> posExsT = new ArrayList<OWLIndividual>(); 393 ArrayList<OWLIndividual> negExsT = new ArrayList<OWLIndividual>(); 394 ArrayList<OWLIndividual> undExsT = new ArrayList<OWLIndividual>(); 395 396 ArrayList<OWLIndividual> posExsF = new ArrayList<OWLIndividual>(); 397 ArrayList<OWLIndividual> negExsF = new ArrayList<OWLIndividual>(); 398 ArrayList<OWLIndividual> undExsF = new ArrayList<OWLIndividual>(); 399 400 ArrayList<OWLIndividual> posExsU = new ArrayList<OWLIndividual>(); 401 ArrayList<OWLIndividual> negExsU = new ArrayList<OWLIndividual>(); 402 ArrayList<OWLIndividual> undExsU = new ArrayList<OWLIndividual>(); 403 404 splitGroup(concept,posExs,posExsT,posExsF,posExsU); 405 splitGroup(concept,negExs,negExsT,negExsF,negExsU); 406 splitGroup(concept,undExs,undExsT,undExsF,undExsU); 407 408 counts[POSITIVE_INSTANCE_CHECK_TRUE] = posExsT.size(); 409 counts[NEGATIVE_INSTANCE_CHECK_TRUE] = negExsT.size(); 410 counts[UNCERTAIN_INSTANCE_CHECK_TRUE] = undExsT.size(); 411 counts[POSITIVE_INSTANCE_CHECK_FALSE] = posExsF.size(); 412 counts[NEGATIVE_INSTANCE_CHECK_FALSE] = negExsF.size(); 413 counts[UNCERTAIN_INSTANCE_CHECK_FALSE] = undExsF.size(); 414 counts[POSITIVE_INSTANCE_CHECK_UNC] = posExsU.size(); 415 counts[NEGATIVE_INSTANCE_CHECK_UNC] = negExsU.size(); 416 counts[UNCERTAIN_INSTANCE_CHECK_UNC] = undExsU.size(); 417 // for(int i=0; i<counts.length;i++) 418 // System.out.println(counts[i]); 419 420 return counts; 421 422}*/ 423 424//public abstract void prune(Integer[] pruningSet, AbstractTree tree, AbstractTree subtree); 425}