Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.decisiontrees.heuristics;
020
021import java.util.SortedSet;
022import java.util.TreeSet;
023
024import org.dllearner.learningproblems.PosNegUndLP;
025import org.dllearner.core.AbstractClassExpressionLearningProblem;
026import org.dllearner.core.AbstractReasonerComponent;
027import org.semanticweb.owlapi.model.OWLClassExpression;
028import org.semanticweb.owlapi.model.OWLDataFactory;
029import org.semanticweb.owlapi.model.OWLIndividual;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032import org.dllearner.algorithms.decisiontrees.dsttdt.dst.DSTUtils;
033import org.dllearner.algorithms.decisiontrees.dsttdt.dst.MassFunction;
034import org.dllearner.algorithms.decisiontrees.utils.Couple;
035
036import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
037
038//import evaluation.Parameters;
039
040public class TreeInductionHeuristics {
041        
042        private AbstractReasonerComponent reasoner;
043        private PosNegUndLP problem;
044        private OWLDataFactory dataFactory= new OWLDataFactoryImpl();
045        private static Logger logger= LoggerFactory.getLogger(TreeInductionHeuristics.class);
046
047        protected static final int UNCERTAIN_INSTANCE_CHECK_UNC = 8;
048
049        protected static final int NEGATIVE_INSTANCE_CHECK_UNC = 7;
050
051        protected static final int POSITIVE_INSTANCE_CHECK_UNC = 6;
052
053        protected static final int UNCERTAIN_INSTANCE_CHECK_FALSE = 5;
054
055        protected static final int NEGATIVE_INSTANCE_CHECK_FALSE = 4;
056
057        protected static final int POSITIVE_INSTANCE_CHECK_FALSE = 3;
058
059        protected static final int UNCERTAIN_INSTANCE_CHECK_TRUE = 2;
060
061        protected static final int NEGATIVE_INSTANCE_CHECK_TRUE = 1;
062
063        protected static final int POSITIVE_INSTANCE_CHECK_TRUE = 0;
064        
065        public TreeInductionHeuristics() {
066                
067        }
068
069        public AbstractClassExpressionLearningProblem getProblem() {
070                return problem;
071        }
072
073        public void setProblem(AbstractClassExpressionLearningProblem problem) {
074                if (problem instanceof PosNegUndLP)
075                        this.problem = (PosNegUndLP)problem;
076                
077                        
078        }
079
080        public AbstractReasonerComponent getReasoner() {
081                return reasoner;
082        }
083
084        public void setReasoner(AbstractReasonerComponent reasoner) {
085                this.reasoner = reasoner;
086                //this.problem=problem; //learning problem      
087        }
088        
089        
090        
091        public void setProblem(PosNegUndLP problem) {
092                this.problem = problem;
093        }
094
095        public void init(){
096                
097        }
098        
099
100        public OWLClassExpression selectBestConcept(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs,
101                        SortedSet<OWLIndividual> undExs, double prPos, double prNeg) {
102                
103
104                int[] counts;
105
106                int bestConceptIndex = 0;
107
108                counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
109                //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
110                        //      "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
111                logger.debug("#"+ 0+"  "+concepts[0]+"\t p:"+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t ");
112                double bestGain = gain(counts, prPos, prNeg);
113
114                System.out.printf("%+10e\n",bestGain);
115
116                System.out.println(concepts[0]);
117
118                for (int c=1; c<concepts.length; c++) {
119
120                        counts = getSplitCounts(concepts[c], posExs, negExs, undExs);
121                        logger.debug("#"+c+"   "+concepts[c]+"   p: "+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t ");
122
123                        double thisGain = gain(counts, prPos, prNeg);
124                        logger.debug(thisGain+"\n");
125                        logger.debug(concepts[c].toString());
126                        if(thisGain < bestGain) {
127                                bestConceptIndex = c;
128                                bestGain = thisGain;
129                        }
130                }
131
132                System.out.printf("best gain: "+ bestGain+" \t split "+ concepts[bestConceptIndex]);
133                return concepts[bestConceptIndex];
134        }
135        
136/*  Confidence-based evaluation (for tackling the imbalance problem) */ 
137public OWLClassExpression selectBestConceptCCP(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs,
138                        SortedSet<OWLIndividual> undExs, double prPos, double prNeg) {
139
140                int[] counts;
141
142                int bestConceptIndex = 0;
143
144                counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
145                
146                logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
147                
148                //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
149                        //      "#"+0, counts[, counts[], counts[], counts[], counts[], counts[], counts[], counts[UNCERTAIN_INSTANCE_CHECK_FALSE], counts[UNCERTAIN_INSTANCE_CHECK_UNC]);
150
151//              SortedSet<OWLIndividual> truePositiveExample = problem.getPositiveExamples();
152//              SortedSet<OWLIndividual> trueNegativeExample = problem.getNegativeExamples();
153                double minEntropy = CCP(counts, prPos, prNeg); // recall improvement
154
155                logger.debug("%+10e\n",minEntropy);
156
157                logger.debug(concepts[0].toString());
158
159                for (int c=1; c<concepts.length; c++) {
160
161                        counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
162//                      System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
163//                                      "#"+c, counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
164                        
165                        //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
166                                        //"#"+c, counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[POSITIVE_INSTANCE_CHECK_UNC], counts[NEGATIVE_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_UNC], counts[UNCERTAIN_INSTANCE_CHECK_TRUE], counts[UNCERTAIN_INSTANCE_CHECK_FALSE], counts[UNCERTAIN_INSTANCE_CHECK_UNC]);
167
168                        logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
169                        double thisEntropy = CCP(counts, prPos, prNeg);
170                        logger.debug(thisEntropy+"\n");
171                        logger.debug(concepts[c].toString());
172                        if(thisEntropy < minEntropy) {
173                                bestConceptIndex = c;
174                                minEntropy = thisEntropy;
175                        }
176                }
177
178                logger.debug("best gain:"+minEntropy+" \t split #" +bestConceptIndex);
179                return concepts[bestConceptIndex];
180}
181
182        private double CCP(int[] counts, double prPos, double prNeg) {
183                // TODO Auto-generated method stub
184                
185                double cP = counts[POSITIVE_INSTANCE_CHECK_TRUE] + counts[POSITIVE_INSTANCE_CHECK_FALSE];
186                double cN = counts[NEGATIVE_INSTANCE_CHECK_TRUE] + counts[NEGATIVE_INSTANCE_CHECK_FALSE];
187                double cU = counts[POSITIVE_INSTANCE_CHECK_UNC] + counts[NEGATIVE_INSTANCE_CHECK_UNC] + counts[UNCERTAIN_INSTANCE_CHECK_TRUE] + counts[UNCERTAIN_INSTANCE_CHECK_FALSE];
188                double sum= cP+cN+cU;
189                double c= sum!=0?cP+cN/sum:0;
190                
191                double sizeTP = counts[0]+1;
192                double sizeFP = counts[1]+1;
193                double sizeFN= counts[3]+1;
194                double sizeTN= counts[4]+1;
195                
196                
197                double tpr= (sizeTP+sizeFP)!=0?((sizeTP)/(sizeTP+sizeFP)):1;
198                double fpr= (sizeFP+sizeTN)!=0?((sizeFP+0.5)/(sizeFP+sizeTN)):1;
199
200                   double p1=(2-tpr-fpr)!=0?(1-tpr)/(2-tpr-fpr):1;
201                   double p2=(2-tpr-fpr)!=0?(1-fpr)/(2-tpr-fpr):1;
202                   //System.out.println( "TPR:"+tpr+"--"+" FPR:"+ fpr+ " p1: "+ p1+" p2:"+p2);
203                   double entropyCCP= (-(tpr+fpr)*((tpr/(tpr+fpr))*Math.log(tpr/(tpr+fpr))-(fpr/(tpr+fpr))*Math.log(fpr/(tpr+fpr)))
204                                   -(2-p1-p2)*(p1*Math.log(p1)-p2*Math.log(p2)));
205
206                return entropyCCP;
207        }
208
209        /* Gain in terms of gini?*/
210        private double gain(int[] counts, double prPos, double prNeg) {
211
212                double sizeT = counts[POSITIVE_INSTANCE_CHECK_TRUE] + counts[POSITIVE_INSTANCE_CHECK_FALSE];
213                double sizeF = counts[NEGATIVE_INSTANCE_CHECK_TRUE] + counts[NEGATIVE_INSTANCE_CHECK_FALSE];
214                double sizeU = counts[POSITIVE_INSTANCE_CHECK_UNC] + counts[NEGATIVE_INSTANCE_CHECK_UNC ] + counts[UNCERTAIN_INSTANCE_CHECK_TRUE] + counts[UNCERTAIN_INSTANCE_CHECK_FALSE];
215                double sum = sizeT+sizeF+sizeU;
216
217                double startImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_TRUE]+counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_TRUE]+counts[NEGATIVE_INSTANCE_CHECK_FALSE], prPos, prNeg);
218                double tImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_TRUE], prPos, prNeg);
219                double fImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_FALSE], counts[NEGATIVE_INSTANCE_CHECK_FALSE], prPos, prNeg);
220                double uImpurity = gini(counts[POSITIVE_INSTANCE_CHECK_UNC]+counts[UNCERTAIN_INSTANCE_CHECK_TRUE], counts[NEGATIVE_INSTANCE_CHECK_UNC]+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] , prPos, prNeg);          
221
222                return (startImpurity - (sizeT/sum)*tImpurity - (sizeF/sum)*fImpurity - -(sizeU/sum)*uImpurity);
223        }
224        
225        
226        static double gini(double numPos, double numNeg, double prPos,
227                        double prNeg) {
228
229                double sum = numPos+numNeg;
230                int M=3;
231
232                double p1 = (numPos*M*prPos)/(sum+M); //m-estimate probability
233                double p2 = (numNeg* M*prNeg)/(sum+M);
234
235                return (1.0-p1*p1-p2*p2);
236                //              return (1-Math.pow(p1,2)-Math.pow(p2,2))/2;
237        }
238
239        private int[] getSplitCounts(OWLClassExpression concept, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs,
240                        SortedSet<OWLIndividual> undExs) {
241                
242                int[] counts = new int[9];
243                SortedSet<OWLIndividual> posExsT = new TreeSet<>();
244                SortedSet<OWLIndividual> negExsT = new TreeSet<>();
245                SortedSet<OWLIndividual> undExsT = new TreeSet<>();
246
247                SortedSet<OWLIndividual> posExsF = new TreeSet<>();
248                SortedSet<OWLIndividual> negExsF = new TreeSet<>();
249                SortedSet<OWLIndividual> undExsF = new TreeSet<>();
250
251                SortedSet<OWLIndividual> posExsU = new TreeSet<>();
252                SortedSet<OWLIndividual> negExsU = new TreeSet<>();
253                SortedSet<OWLIndividual> undExsU = new TreeSet<>();
254
255                splitGroup(concept,posExs,posExsT,posExsF,posExsU);
256                splitGroup(concept,negExs,negExsT,negExsF,negExsU);     
257                splitGroup(concept,undExs,undExsT,undExsF,undExsU);     
258
259                counts[0] = posExsT.size(); 
260                counts[1] = negExsT.size(); 
261                counts[2] = undExsT.size(); 
262                counts[3] = posExsF.size(); 
263                counts[4] = negExsF.size();
264                counts[5] = undExsF.size();
265                counts[6] = posExsU.size(); 
266                counts[7] = negExsU.size();
267                counts[8] = undExsU.size();
268                //              for(int i=0; i<counts.length;i++)
269                //                      System.out.println(counts[i]);
270
271                return counts;
272
273        }
274
275        protected void split(OWLClassExpression concept, SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs,
276                        SortedSet<OWLIndividual> posExsT, SortedSet<OWLIndividual> negExsT, SortedSet<OWLIndividual> undExsT, SortedSet<OWLIndividual> posExsF, SortedSet<OWLIndividual> negExsF,
277                        SortedSet<OWLIndividual> undExsF) {
278
279                SortedSet<OWLIndividual> posExsU = new TreeSet<>();
280                SortedSet<OWLIndividual> negExsU = new TreeSet<>();
281                SortedSet<OWLIndividual> undExsU = new TreeSet<>();
282
283                splitGroup(concept,posExs,posExsT,posExsF,posExsU);
284                splitGroup(concept,negExs,negExsT,negExsF,negExsU);
285                splitGroup(concept,undExs,undExsT,undExsF,undExsU);     
286
287        }
288
289        private void splitGroup(OWLClassExpression concept, SortedSet<OWLIndividual> nodeExamples, SortedSet<OWLIndividual> posExsT,
290                        SortedSet<OWLIndividual> falseExs, SortedSet<OWLIndividual> posExsU) {
291                OWLClassExpression negConcept = dataFactory.getOWLObjectComplementOf(concept);
292
293                for ( OWLIndividual individual :nodeExamples ){//int e=0; e<nodeExamples.size(); e++) {
294                        
295//                      int exIndex = nodeExamples.get(e);
296                        if (reasoner.hasType(concept, individual))
297                                posExsT.add(individual);
298                        else if (reasoner.hasType(negConcept, individual))
299                                falseExs.add(individual);
300                        else
301                                posExsU.add(individual);                
302                }       
303
304        
305
306}
307        /**
308         * Returns the best pair with the lowest non specificity measure. To be used with the original refinement operator for DL
309         * @param concepts
310         * @param posExs
311         * @param negExs
312         * @param undExs
313         * @param prPos
314         * @param prNeg
315         * @return
316         */
317        
318        public  Couple<OWLClassExpression, MassFunction> selectBestConceptDST(OWLClassExpression[] concepts,
319                        SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, 
320                        double prPos, double prNeg) {
321
322                int[] counts;
323
324                int bestConceptIndex = 0;
325
326                counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
327                //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
328                        //      "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
329                logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
330                //              double bestGain = gain(counts, prPos, prNeg);
331        
332                int posExs2 = counts[0] + counts[1];
333                int negExs2 = counts[3] + counts[4];
334                int undExs2 = counts[6] + counts[7] + counts[2] + counts[5];
335                //System.out.println("Split: "+posExs2 +"---"+negExs2+"--"+undExs2);
336                MassFunction<Integer> bestBba = DSTUtils.getBBA(posExs2,negExs2,undExs2);
337
338                double bestNonSpecificity = bestBba.getNonSpecificityMeasureValue();
339                bestBba.getConfusionMeasure();
340                logger.debug("%+10e\n",bestNonSpecificity);
341
342                System.out.println(concepts[0]);
343
344                for (int c=1; c<concepts.length; c++) {
345
346                        counts = getSplitCounts(concepts[c], posExs, negExs, undExs);
347//                      logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
348//                                      "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
349
350                        logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
351                        MassFunction<Integer> thisbba = DSTUtils.getBBA(counts[0] + counts[1],counts[3] + counts[4],counts[6] + counts[7] + counts[2] + counts[5]);
352                        double thisNonSpecificity = thisbba.getNonSpecificityMeasureValue();
353                        thisbba.getGlobalUncertaintyMeasure();
354                        logger.debug("%+10e\n",thisNonSpecificity);
355                        logger.debug("%+10e\n",thisNonSpecificity);
356                        logger.debug(concepts[c].toString());
357                        //select the worst concept
358                        if(thisNonSpecificity <= bestNonSpecificity) {
359                                //                      if(thisGlobalUncMeasure < bestTotaluncertaintyMeasure) {
360                                bestConceptIndex = c;
361                                bestNonSpecificity = thisNonSpecificity;
362                                bestBba= thisbba;
363                        }
364                }
365
366                logger.debug("best gain: %f \t split #%d\n", bestNonSpecificity, bestConceptIndex);
367                Couple<OWLClassExpression,MassFunction> name = new Couple<>();
368                name.setFirstElement(concepts[bestConceptIndex]);
369                name.setSecondElement(bestBba);
370                return name;
371        }
372
373        
374        /**
375         * A method which select the worst pair in terms of non-specificity measure. To be used jointly with the original refinement operators of DL-LEarner 
376         * @param concepts
377         * @param posExs
378         * @param negExs
379         * @param undExs
380         * @param prPos
381         * @param prNeg
382         * @return
383         */
384        public  Couple<OWLClassExpression, MassFunction> selectWorstConceptDST(OWLClassExpression[] concepts,
385                        SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, 
386                        double prPos, double prNeg) {
387
388                int[] counts;
389
390                int bestConceptIndex = 0;
391
392                counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
393                //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
394                        //      "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
395                logger.debug("#"+0+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
396                //              double bestGain = gain(counts, prPos, prNeg);
397                //  introduzione della mbisura di non specificitï¿½
398                int posExs2 = counts[0] + counts[1];
399                int negExs2 = counts[3] + counts[4];
400                int undExs2 = counts[6] + counts[7] + counts[2] + counts[5];
401                //System.out.println("Split: "+posExs2 +"---"+negExs2+"--"+undExs2);
402                MassFunction<Integer> bestBba = DSTUtils.getBBA(posExs2,negExs2,undExs2);
403
404                double bestNonSpecificity = bestBba.getNonSpecificityMeasureValue();
405                bestBba.getConfusionMeasure();
406                logger.debug("%+10e\n",bestNonSpecificity);
407
408                System.out.println(concepts[0]);
409
410                for (int c=1; c<concepts.length; c++) {
411
412                        counts = getSplitCounts(concepts[c], posExs, negExs, undExs);
413//                      logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
414//                                      "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
415
416                        logger.debug("#"+c+"\t p:"+counts[POSITIVE_INSTANCE_CHECK_TRUE]+"n:"+counts[POSITIVE_INSTANCE_CHECK_FALSE]+"u:"+counts[POSITIVE_INSTANCE_CHECK_UNC] +"\t p:"+counts[NEGATIVE_INSTANCE_CHECK_TRUE] +" n:"+counts[NEGATIVE_INSTANCE_CHECK_FALSE] +" u:"+ counts[NEGATIVE_INSTANCE_CHECK_UNC]+"\t p:"+counts[UNCERTAIN_INSTANCE_CHECK_TRUE] +" n:"+counts[UNCERTAIN_INSTANCE_CHECK_FALSE] +" u:"+counts[UNCERTAIN_INSTANCE_CHECK_UNC] +"\t ");
417                        MassFunction<Integer> thisbba = DSTUtils.getBBA(counts[0] + counts[1],counts[3] + counts[4],counts[6] + counts[7] + counts[2] + counts[5]);
418                        double thisNonSpecificity = thisbba.getNonSpecificityMeasureValue();
419                        thisbba.getGlobalUncertaintyMeasure();
420                        logger.debug("%+10e\n",thisNonSpecificity);
421                        logger.debug("%+10e\n",thisNonSpecificity);
422                        logger.debug(concepts[c].toString());
423                        //select the worst concept
424                        if(thisNonSpecificity >= bestNonSpecificity) {
425                                //                      if(thisGlobalUncMeasure < bestTotaluncertaintyMeasure) {
426                                bestConceptIndex = c;
427                                bestNonSpecificity = thisNonSpecificity;
428                                bestBba= thisbba;
429                        }
430                }
431
432                logger.debug("best gain: %f \t split #%d\n", bestNonSpecificity, bestConceptIndex);
433                Couple<OWLClassExpression,MassFunction> name = new Couple<>();
434                name.setFirstElement(concepts[bestConceptIndex]);
435                name.setSecondElement(bestBba);
436                return name;
437        }
438
439/**
440 * Selct the worst concept in terms of information gain. To be used jointly with 
441 * @param concepts
442 * @param posExs
443 * @param negExs
444 * @param undExs
445 * @param perPos
446 * @param perNeg
447 * @return
448 */
449        public OWLClassExpression selectWorstConcept(OWLClassExpression[] concepts, SortedSet<OWLIndividual> posExs,
450                        SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs, double perPos, double perNeg) {
451                // TODO Auto-generated method stub
452                int[] counts;
453
454                int bestConceptIndex = 0;
455
456                counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
457                //logger.debug("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ", 
458                        //      "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
459                logger.debug("#"+ 0+"  "+concepts[0]+"\t p:"+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t ");
460                double bestGain = gain(counts, perPos, perNeg);
461
462                System.out.printf("%+10e\n",bestGain);
463
464                System.out.println(concepts[0]);
465
466                for (int c=1; c<concepts.length; c++) {
467
468                        counts = getSplitCounts(concepts[c], posExs, negExs, undExs);
469                        logger.debug("#"+c+"   "+concepts[c]+"   p: "+counts[0]+"n:"+counts[1]+"u:"+counts[2] +"\t p:"+counts[3] +" n:"+counts[4] +" u:"+ counts[5]+"\t p:"+counts[6] +" n:"+counts[7] +" u:"+counts[8] +"\t ");
470
471                        double thisGain = gain(counts, perPos, perNeg);
472                        logger.debug(thisGain+"\n");
473                        logger.debug(concepts[c].toString());
474                        if(thisGain > bestGain) {
475                                bestConceptIndex = c;
476                                bestGain = thisGain;
477                        }
478                }
479
480                System.out.printf("best gain: "+ bestGain+" \t split "+ concepts[bestConceptIndex]);
481                return concepts[bestConceptIndex];
482
483        }
484
485        
486        
487}