001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.decisiontrees.heuristics; 020 021import java.util.SortedSet; 022import java.util.TreeSet; 023 024import org.dllearner.learningproblems.PosNegUndLP; 025import org.dllearner.core.AbstractClassExpressionLearningProblem; 026import org.dllearner.core.AbstractReasonerComponent; 027import org.semanticweb.owlapi.model.OWLClassExpression; 028import org.semanticweb.owlapi.model.OWLDataFactory; 029import org.semanticweb.owlapi.model.OWLIndividual; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032import org.dllearner.algorithms.decisiontrees.dsttdt.dst.DSTUtils; 033import org.dllearner.algorithms.decisiontrees.dsttdt.dst.MassFunction; 034import org.dllearner.algorithms.decisiontrees.utils.Couple; 035 036import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 037 038//import evaluation.Parameters; 039 040public class TreeInductionHeuristics { 041 042 private AbstractReasonerComponent reasoner; 043 private PosNegUndLP problem; 044 private OWLDataFactory dataFactory= new OWLDataFactoryImpl(); 045 private static Logger logger= LoggerFactory.getLogger(TreeInductionHeuristics.class); 046 047 protected static final int UNCERTAIN_INSTANCE_CHECK_UNC = 8; 048 049 protected static final int NEGATIVE_INSTANCE_CHECK_UNC = 7; 050 051 protected static final int POSITIVE_INSTANCE_CHECK_UNC = 6; 052 053 protected static final int UNCERTAIN_INSTANCE_CHECK_FALSE = 5; 054 055 protected static final int NEGATIVE_INSTANCE_CHECK_FALSE = 4; 056 057 protected static final int POSITIVE_INSTANCE_CHECK_FALSE = 3; 058 059 protected static final int UNCERTAIN_INSTANCE_CHECK_TRUE = 2; 060 061 protected static final int NEGATIVE_INSTANCE_CHECK_TRUE = 1; 062 063 protected static final int POSITIVE_INSTANCE_CHECK_TRUE = 0; 064 065 public TreeInductionHeuristics() { 066 067 } 068 069 public AbstractClassExpressionLearningProblem getProblem() { 070 return problem; 071 } 072 073 public void setProblem(AbstractClassExpressionLearningProblem problem) { 074 if (problem instanceof PosNegUndLP) 075 this.problem = (PosNegUndLP)problem; 076 077 078 } 079 080 public AbstractReasonerComponent getReasoner() { 081 return reasoner; 082 } 083 084 public void setReasoner(AbstractReasonerComponent reasoner) { 085 this.reasoner = reasoner; 086 //this.problem=problem; //learning problem 087 } 088 089 090 091 public void setProblem(PosNegUndLP problem) { 092 this.problem = problem; 093 } 094 095 public void init(){ 096 097 } 098 099 100 public OWLClassExpression selectBestConcept(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, 101 SortedSet<OWLIndividual> undExs, double prPos, double prNeg) { 102 103 104 int[] counts; 105 106 int bestConceptIndex = 0; 107 108 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 109 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 110 // "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 111 logger.debug("#"+ 0+" "+concepts[0]+"\t p:"+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t "); 112 double bestGain = gain(counts, prPos, prNeg); 113 114 System.out.printf("%+10e\n",bestGain); 115 116 System.out.println(concepts[0]); 117 118 for (int c=1; c<concepts.length; c++) { 119 120 counts = getSplitCounts(concepts[c], posExs, negExs, undExs); 121 logger.debug("#"+c+" "+concepts[c]+" p: "+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t "); 122 123 double thisGain = gain(counts, prPos, prNeg); 124 logger.debug(thisGain+"\n"); 125 logger.debug(concepts[c].toString()); 126 if(thisGain < bestGain) { 127 bestConceptIndex = c; 128 bestGain = thisGain; 129 } 130 } 131 132 System.out.printf("best gain: "+ bestGain+" \t split "+ concepts[bestConceptIndex]); 133 return concepts[bestConceptIndex]; 134 } 135 136/* Confidence-based evaluation (for tackling the imbalance problem) */ 137public OWLClassExpression selectBestConceptCCP(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, 138 SortedSet<OWLIndividual> undExs, double prPos, double prNeg) { 139 140 int[] counts; 141 142 int bestConceptIndex = 0; 143 144 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 145 146 logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 147 148 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 149 // "#"+0, counts[, counts[], counts[], counts[], counts[], counts[], counts[], counts[UNCERTAIN_INSTANCE_CHECK_FALSE], counts[UNCERTAIN_INSTANCE_CHECK_UNC]); 150 151// SortedSet<OWLIndividual> truePositiveExample = problem.getPositiveExamples(); 152// SortedSet<OWLIndividual> trueNegativeExample = problem.getNegativeExamples(); 153 double minEntropy = CCP(counts, prPos, prNeg); // recall improvement 154 155 logger.debug("%+10e\n",minEntropy); 156 157 logger.debug(concepts[0].toString()); 158 159 for (int c=1; c<concepts.length; c++) { 160 161 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 162// System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 163// "#"+c, counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 164 165 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 166 //"#"+c, counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[POSITIVE_INSTANCE_CHECK_UNC], counts[NEGATIVE_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_UNC], counts[UNCERTAIN_INSTANCE_CHECK_TRUE], counts[UNCERTAIN_INSTANCE_CHECK_FALSE], counts[UNCERTAIN_INSTANCE_CHECK_UNC]); 167 168 logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 169 double thisEntropy = CCP(counts, prPos, prNeg); 170 logger.debug(thisEntropy+"\n"); 171 logger.debug(concepts[c].toString()); 172 if(thisEntropy < minEntropy) { 173 bestConceptIndex = c; 174 minEntropy = thisEntropy; 175 } 176 } 177 178 logger.debug("best gain:"+minEntropy+" \t split #" +bestConceptIndex); 179 return concepts[bestConceptIndex]; 180} 181 182 private double CCP(int[] counts, double prPos, double prNeg) { 183 // TODO Auto-generated method stub 184 185 double cP = counts[POSITIVE_INSTANCE_CHECK_TRUE] + counts[POSITIVE_INSTANCE_CHECK_FALSE]; 186 double cN = counts[NEGATIVE_INSTANCE_CHECK_TRUE] + counts[NEGATIVE_INSTANCE_CHECK_FALSE]; 187 double cU = counts[POSITIVE_INSTANCE_CHECK_UNC] + counts[NEGATIVE_INSTANCE_CHECK_UNC] + counts[UNCERTAIN_INSTANCE_CHECK_TRUE] + counts[UNCERTAIN_INSTANCE_CHECK_FALSE]; 188 double sum= cP+cN+cU; 189 double c= sum!=0?cP+cN/sum:0; 190 191 double sizeTP = counts[0]+1; 192 double sizeFP = counts[1]+1; 193 double sizeFN= counts[3]+1; 194 double sizeTN= counts[4]+1; 195 196 197 double tpr= (sizeTP+sizeFP)!=0?((sizeTP)/(sizeTP+sizeFP)):1; 198 double fpr= (sizeFP+sizeTN)!=0?((sizeFP+0.5)/(sizeFP+sizeTN)):1; 199 200 double p1=(2-tpr-fpr)!=0?(1-tpr)/(2-tpr-fpr):1; 201 double p2=(2-tpr-fpr)!=0?(1-fpr)/(2-tpr-fpr):1; 202 //System.out.println( "TPR:"+tpr+"--"+" FPR:"+ fpr+ " p1: "+ p1+" p2:"+p2); 203 double entropyCCP= (-(tpr+fpr)*((tpr/(tpr+fpr))*Math.log(tpr/(tpr+fpr))-(fpr/(tpr+fpr))*Math.log(fpr/(tpr+fpr))) 204 -(2-p1-p2)*(p1*Math.log(p1)-p2*Math.log(p2))); 205 206 return entropyCCP; 207 } 208 209 /* Gain in terms of gini?*/ 210 private double gain(int[] counts, double prPos, double prNeg) { 211 212 double sizeT = counts[POSITIVE_INSTANCE_CHECK_TRUE] + counts[POSITIVE_INSTANCE_CHECK_FALSE]; 213 double sizeF = counts[NEGATIVE_INSTANCE_CHECK_TRUE] + counts[NEGATIVE_INSTANCE_CHECK_FALSE]; 214 double sizeU = counts[POSITIVE_INSTANCE_CHECK_UNC] + counts[NEGATIVE_INSTANCE_CHECK_UNC ] + counts[UNCERTAIN_INSTANCE_CHECK_TRUE] + counts[UNCERTAIN_INSTANCE_CHECK_FALSE]; 215 double sum = sizeT+sizeF+sizeU; 216 217 double startImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_TRUE]+counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_TRUE]+counts[NEGATIVE_INSTANCE_CHECK_FALSE], prPos, prNeg); 218 double tImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_TRUE], prPos, prNeg); 219 double fImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_FALSE], prPos, prNeg); 220 double uImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_UNC]+counts[UNCERTAIN_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_UNC]+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] , prPos, prNeg); 221 222 return (startImpurity - (sizeT/sum)*tImpurity - (sizeF/sum)*fImpurity - -(sizeU/sum)*uImpurity); 223 } 224 225 226 static double gini(double numPos, double numNeg, double prPos, 227 double prNeg) { 228 229 double sum = numPos+numNeg; 230 int M=3; 231 232 double p1 = (numPos*M*prPos)/(sum+M); //m-estimate probability 233 double p2 = (numNeg* M*prNeg)/(sum+M); 234 235 return (1.0-p1*p1-p2*p2); 236 // return (1-Math.pow(p1,2)-Math.pow(p2,2))/2; 237 } 238 239 private int[] getSplitCounts(OWLClassExpression concept, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, 240 SortedSet<OWLIndividual> undExs) { 241 242 int[] counts = new int[9]; 243 SortedSet<OWLIndividual> posExsT = new TreeSet<>(); 244 SortedSet<OWLIndividual> negExsT = new TreeSet<>(); 245 SortedSet<OWLIndividual> undExsT = new TreeSet<>(); 246 247 SortedSet<OWLIndividual> posExsF = new TreeSet<>(); 248 SortedSet<OWLIndividual> negExsF = new TreeSet<>(); 249 SortedSet<OWLIndividual> undExsF = new TreeSet<>(); 250 251 SortedSet<OWLIndividual> posExsU = new TreeSet<>(); 252 SortedSet<OWLIndividual> negExsU = new TreeSet<>(); 253 SortedSet<OWLIndividual> undExsU = new TreeSet<>(); 254 255 splitGroup(concept,posExs,posExsT,posExsF,posExsU); 256 splitGroup(concept,negExs,negExsT,negExsF,negExsU); 257 splitGroup(concept,undExs,undExsT,undExsF,undExsU); 258 259 counts[0] = posExsT.size(); 260 counts[1] = negExsT.size(); 261 counts[2] = undExsT.size(); 262 counts[3] = posExsF.size(); 263 counts[4] = negExsF.size(); 264 counts[5] = undExsF.size(); 265 counts[6] = posExsU.size(); 266 counts[7] = negExsU.size(); 267 counts[8] = undExsU.size(); 268 // for(int i=0; i<counts.length;i++) 269 // System.out.println(counts[i]); 270 271 return counts; 272 273 } 274 275 protected void split(OWLClassExpression concept, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, 276 SortedSet<OWLIndividual> posExsT, SortedSet<OWLIndividual> negExsT, SortedSet<OWLIndividual> undExsT, SortedSet<OWLIndividual> posExsF, SortedSet<OWLIndividual> negExsF, 277 SortedSet<OWLIndividual> undExsF) { 278 279 SortedSet<OWLIndividual> posExsU = new TreeSet<>(); 280 SortedSet<OWLIndividual> negExsU = new TreeSet<>(); 281 SortedSet<OWLIndividual> undExsU = new TreeSet<>(); 282 283 splitGroup(concept,posExs,posExsT,posExsF,posExsU); 284 splitGroup(concept,negExs,negExsT,negExsF,negExsU); 285 splitGroup(concept,undExs,undExsT,undExsF,undExsU); 286 287 } 288 289 private void splitGroup(OWLClassExpression concept, SortedSet<OWLIndividual> nodeExamples, SortedSet<OWLIndividual> posExsT, 290 SortedSet<OWLIndividual> falseExs, SortedSet<OWLIndividual> posExsU) { 291 OWLClassExpression negConcept = dataFactory.getOWLObjectComplementOf(concept); 292 293 for ( OWLIndividual individual :nodeExamples ){//int e=0; e<nodeExamples.size(); e++) { 294 295// int exIndex = nodeExamples.get(e); 296 if (reasoner.hasType(concept, individual)) 297 posExsT.add(individual); 298 else if (reasoner.hasType(negConcept, individual)) 299 falseExs.add(individual); 300 else 301 posExsU.add(individual); 302 } 303 304 305 306} 307 /** 308 * Returns the best pair with the lowest non specificity measure. To be used with the original refinement operator for DL 309 * @param concepts 310 * @param posExs 311 * @param negExs 312 * @param undExs 313 * @param prPos 314 * @param prNeg 315 * @return 316 */ 317 318 public Couple<OWLClassExpression, MassFunction> selectBestConceptDST(OWLClassExpression[] concepts, 319 SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, 320 double prPos, double prNeg) { 321 322 int[] counts; 323 324 int bestConceptIndex = 0; 325 326 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 327 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 328 // "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 329 logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 330 // double bestGain = gain(counts, prPos, prNeg); 331 332 int posExs2 = counts[0] + counts[1]; 333 int negExs2 = counts[3] + counts[4]; 334 int undExs2 = counts[6] + counts[7] + counts[2] + counts[5]; 335 //System.out.println("Split: "+posExs2 +"---"+negExs2+"--"+undExs2); 336 MassFunction<Integer> bestBba = DSTUtils.getBBA(posExs2,negExs2,undExs2); 337 338 double bestNonSpecificity = bestBba.getNonSpecificityMeasureValue(); 339 bestBba.getConfusionMeasure(); 340 logger.debug("%+10e\n",bestNonSpecificity); 341 342 System.out.println(concepts[0]); 343 344 for (int c=1; c<concepts.length; c++) { 345 346 counts = getSplitCounts(concepts[c], posExs, negExs, undExs); 347// logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 348// "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 349 350 logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 351 MassFunction<Integer> thisbba = DSTUtils.getBBA(counts[0] + counts[1],counts[3] + counts[4],counts[6] + counts[7] + counts[2] + counts[5]); 352 double thisNonSpecificity = thisbba.getNonSpecificityMeasureValue(); 353 thisbba.getGlobalUncertaintyMeasure(); 354 logger.debug("%+10e\n",thisNonSpecificity); 355 logger.debug("%+10e\n",thisNonSpecificity); 356 logger.debug(concepts[c].toString()); 357 //select the worst concept 358 if(thisNonSpecificity <= bestNonSpecificity) { 359 // if(thisGlobalUncMeasure < bestTotaluncertaintyMeasure) { 360 bestConceptIndex = c; 361 bestNonSpecificity = thisNonSpecificity; 362 bestBba= thisbba; 363 } 364 } 365 366 logger.debug("best gain: %f \t split #%d\n", bestNonSpecificity, bestConceptIndex); 367 Couple<OWLClassExpression,MassFunction> name = new Couple<>(); 368 name.setFirstElement(concepts[bestConceptIndex]); 369 name.setSecondElement(bestBba); 370 return name; 371 } 372 373 374 /** 375 * A method which select the worst pair in terms of non-specificity measure. To be used jointly with the original refinement operators of DL-LEarner 376 * @param concepts 377 * @param posExs 378 * @param negExs 379 * @param undExs 380 * @param prPos 381 * @param prNeg 382 * @return 383 */ 384 public Couple<OWLClassExpression, MassFunction> selectWorstConceptDST(OWLClassExpression[] concepts, 385 SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, 386 double prPos, double prNeg) { 387 388 int[] counts; 389 390 int bestConceptIndex = 0; 391 392 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 393 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 394 // "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 395 logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 396 // double bestGain = gain(counts, prPos, prNeg); 397 // introduzione della mbisura di non specificit� 398 int posExs2 = counts[0] + counts[1]; 399 int negExs2 = counts[3] + counts[4]; 400 int undExs2 = counts[6] + counts[7] + counts[2] + counts[5]; 401 //System.out.println("Split: "+posExs2 +"---"+negExs2+"--"+undExs2); 402 MassFunction<Integer> bestBba = DSTUtils.getBBA(posExs2,negExs2,undExs2); 403 404 double bestNonSpecificity = bestBba.getNonSpecificityMeasureValue(); 405 bestBba.getConfusionMeasure(); 406 logger.debug("%+10e\n",bestNonSpecificity); 407 408 System.out.println(concepts[0]); 409 410 for (int c=1; c<concepts.length; c++) { 411 412 counts = getSplitCounts(concepts[c], posExs, negExs, undExs); 413// logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 414// "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 415 416 logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t "); 417 MassFunction<Integer> thisbba = DSTUtils.getBBA(counts[0] + counts[1],counts[3] + counts[4],counts[6] + counts[7] + counts[2] + counts[5]); 418 double thisNonSpecificity = thisbba.getNonSpecificityMeasureValue(); 419 thisbba.getGlobalUncertaintyMeasure(); 420 logger.debug("%+10e\n",thisNonSpecificity); 421 logger.debug("%+10e\n",thisNonSpecificity); 422 logger.debug(concepts[c].toString()); 423 //select the worst concept 424 if(thisNonSpecificity >= bestNonSpecificity) { 425 // if(thisGlobalUncMeasure < bestTotaluncertaintyMeasure) { 426 bestConceptIndex = c; 427 bestNonSpecificity = thisNonSpecificity; 428 bestBba= thisbba; 429 } 430 } 431 432 logger.debug("best gain: %f \t split #%d\n", bestNonSpecificity, bestConceptIndex); 433 Couple<OWLClassExpression,MassFunction> name = new Couple<>(); 434 name.setFirstElement(concepts[bestConceptIndex]); 435 name.setSecondElement(bestBba); 436 return name; 437 } 438 439/** 440 * Selct the worst concept in terms of information gain. To be used jointly with 441 * @param concepts 442 * @param posExs 443 * @param negExs 444 * @param undExs 445 * @param perPos 446 * @param perNeg 447 * @return 448 */ 449 public OWLClassExpression selectWorstConcept(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs, 450 SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, double perPos, double perNeg) { 451 // TODO Auto-generated method stub 452 int[] counts; 453 454 int bestConceptIndex = 0; 455 456 counts = getSplitCounts(concepts[0], posExs, negExs, undExs); 457 //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 458 // "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]); 459 logger.debug("#"+ 0+" "+concepts[0]+"\t p:"+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t "); 460 double bestGain = gain(counts, perPos, perNeg); 461 462 System.out.printf("%+10e\n",bestGain); 463 464 System.out.println(concepts[0]); 465 466 for (int c=1; c<concepts.length; c++) { 467 468 counts = getSplitCounts(concepts[c], posExs, negExs, undExs); 469 logger.debug("#"+c+" "+concepts[c]+" p: "+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t "); 470 471 double thisGain = gain(counts, perPos, perNeg); 472 logger.debug(thisGain+"\n"); 473 logger.debug(concepts[c].toString()); 474 if(thisGain > bestGain) { 475 bestConceptIndex = c; 476 bestGain = thisGain; 477 } 478 } 479 480 System.out.printf("best gain: "+ bestGain+" \t split "+ concepts[bestConceptIndex]); 481 return concepts[bestConceptIndex]; 482 483 } 484 485 486 487}