001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.decisiontrees.tdt;
020
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.SortedSet;
025import java.util.Stack;
026
027import org.dllearner.algorithms.decisiontrees.heuristics.TreeInductionHeuristics;
028import org.dllearner.algorithms.decisiontrees.refinementoperators.DLTreesRefinementOperator;
029import org.dllearner.algorithms.decisiontrees.tdt.model.DLTree;
030import org.dllearner.core.AbstractCELA;
031import org.dllearner.core.AbstractClassExpressionLearningProblem;
032import org.dllearner.core.AbstractReasonerComponent;
033import org.dllearner.core.ComponentAnn;
034import org.dllearner.core.ComponentInitException;
035import org.dllearner.core.config.ConfigOption;
036import org.dllearner.refinementoperators.RefinementOperator;
037import org.semanticweb.owlapi.model.OWLClassExpression;
038import org.semanticweb.owlapi.model.OWLIndividual;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042@ComponentAnn(name="ATDT", shortName="atdt", version=1.0, description="An abstract Terminological Decision Tree")
043public abstract class AbstractTDTClassifier extends AbstractCELA {
044        
045        private static Logger logger = LoggerFactory.getLogger(AbstractTDTClassifier.class);
046
047        protected boolean stop;
048
049        public double getPuritythreshold() {
050                return puritythreshold;
051        }
052
053        public void setPuritythreshold(double puritythreshold) {
054                this.puritythreshold = puritythreshold;
055        }
056
057//      public int getBeam() {
058//              return beam;
059//      }
060
061//      public void setBeam(int beam) {
062//              this.beam = beam;
063//      }
064
065        public boolean isBinaryClassification() {
066                return binaryClassification;
067        }
068
069        public void setBinaryClassification(boolean binaryClassification) {
070                this.binaryClassification = binaryClassification;
071        }
072
073        public OWLClassExpression getClassToDescribe() {
074                return classToDescribe;
075        }
076
077        public void setClassToDescribe(OWLClassExpression classToDescribe) {
078                this.classToDescribe = classToDescribe;
079        }
080
081        public TreeInductionHeuristics getHeuristic() {
082                return heuristic;
083        }
084
085        public void setHeuristic(TreeInductionHeuristics heuristic) {
086                this.heuristic = heuristic;
087        }
088
089        public RefinementOperator getOperator() {
090                return operator;
091        }
092
093        public void setOperator(RefinementOperator operator) {
094                this.operator = operator;
095        }
096
097        @ConfigOption(defaultValue = "0.05", description = "Purity threshold for setting a leaf")
098        protected  double puritythreshold;
099
100        //@ConfigOption(defaultValue = "4")
101        //protected int  beam;
102        
103        @ConfigOption(defaultValue = "false", description = "if it is a binary classification problem")
104        protected boolean binaryClassification;
105        
106        @ConfigOption(defaultValue = "false", description = "value for limiting the number of generated concepts")
107        protected boolean ccp;
108        
109        public boolean isCcp() {
110                return ccp;
111        }
112
113        public void setCcp(boolean ccp) {
114                this.ccp = ccp;
115        }
116
117        @ConfigOption(defaultValue = "false", description = "for overcoming the problem of missing values in tree algorithms.tree.models")
118        protected boolean missingValueTreatmentForTDT;
119        protected double prPos;
120        protected double prNeg;
121        @ConfigOption(description = "concept for splitting undefined examples into positive and negative for binary classification problems")
122        protected OWLClassExpression classToDescribe; //target concept
123        @ConfigOption(description = "the heuristic instance to use", defaultValue = "TreeInductionHeuristics")
124        protected TreeInductionHeuristics heuristic; // heuristic
125        //protected LengthLimitedRefinementOperator operator ;// refinement operator
126
127        @ConfigOption(description = "the refinement operator instance to use", defaultValue = "DLTreesRefinementOperator")
128        protected RefinementOperator operator;
129
130        public boolean isMissingValueTreatmentForTDT() {
131                return missingValueTreatmentForTDT;
132        }
133
134        public void setMissingValueTreatmentForTDT(boolean missingValueTreatmentForTDT) {
135                this.missingValueTreatmentForTDT = missingValueTreatmentForTDT;
136        }
137
138        
139        
140        public AbstractTDTClassifier(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner, RefinementOperator op) {
141                super(problem, reasoner);
142                //              configurator = new CELOEConfigurator(this);
143        
144        this.operator=op;
145        System.out.println(operator==null);
146        }
147        
148        
149        @Override
150        public void start() {
151
152                // TODO Auto-generated method stub
153
154                
155                                
156        }
157
158        
159        @Override
160        public void init() throws ComponentInitException {
161                // TODO Auto-generated method stub
162                baseURI = reasoner.getBaseURI();
163                prefixes = reasoner.getPrefixes();
164
165                // if no one injected a heuristic, we use a default one
166                if(heuristic == null) {
167                        heuristic = new TreeInductionHeuristics();
168                        heuristic.setProblem(learningProblem);
169                        heuristic.setReasoner(reasoner);
170                        heuristic.init();
171                }
172
173                
174                        
175                
176                if(operator == null) {
177        System.out.println("OPERATOR:"+operator==null);
178//                      // default operator
179        operator = new DLTreesRefinementOperator();
180        ((DLTreesRefinementOperator)operator).setReasoner(reasoner);
181        ((DLTreesRefinementOperator)operator).setBeam(10); // default value
182////
183////                    if(operator instanceof CustomStartRefinementOperator) {
184////                            ((CustomStartRefinementOperator)operator).setStartClass(startClass);
185////                    }
186////                    if(operator instanceof ReasoningBasedRefinementOperator) {
187////                            ((ReasoningBasedRefinementOperator)operator).setReasoner(reasoner);
188////                    }
189        operator.init();
190        
191                //System.out.println(operator==null);
192//
193//
194    }
195
196                //start to learn the new current concept description
197                
198                
199                initialized = true;
200        }
201
202        @Override
203        public void stop() {
204                // TODO Auto-generated method stub
205                stop = true;
206
207        }
208
209        @Override
210        public boolean isRunning() {
211                // TODO Auto-generated method stub
212                return (!stop);
213        }
214
215        public abstract DLTree induceDLTree(SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs,   SortedSet<OWLIndividual> undExs);
216
217        public  int  classify(OWLIndividual indTestEx, DLTree trees) {
218
219                //int length = testConcepts!=null?testConcepts.length:1;
220                //for (int c=0; c < length; c++) {
221                        if (missingValueTreatmentForTDT){
222                                ArrayList<Integer> list= new ArrayList<>();
223                                return  classifyExample(list,indTestEx, trees);
224
225                        }
226                        else
227                                return classifyExample(indTestEx, trees);
228
229                //} // for c
230
231        }
232
233        
234        public int classifyExample(OWLIndividual indTestEx, DLTree tree) {
235
236                Stack<DLTree> stack= new Stack<>();
237                //OWLDataFactory dataFactory = kb.getDataFactory();
238                stack.add(tree);
239                int result=0;
240                boolean stop=false;
241
242                if (!binaryClassification){
243                        while(!stack.isEmpty() && !stop){
244                                DLTree currentTree= stack.pop();
245
246                                OWLClassExpression rootClass = currentTree.getRoot();
247                                //                      System.out.println("Root class: "+ rootClass);
248                                if (rootClass.equals(dataFactory.getOWLThing())){
249                                        stop=true;
250                                        result=+1;
251
252                                }
253                                else if (rootClass.equals(dataFactory.getOWLNothing())){
254                                        stop=true;
255                                        result=-1;
256
257                                }else if (reasoner.hasType(rootClass, indTestEx))
258                                        stack.push(currentTree.getPosSubTree());
259                                else if (reasoner.hasType(dataFactory.getOWLObjectComplementOf(rootClass), indTestEx))
260                                        stack.push(currentTree.getNegSubTree());
261                                else {
262                                        stop=true;
263                                        result=0;
264
265                                }
266
267                        }
268                }else{
269                        while(!stack.isEmpty() && !stop){
270                                DLTree currentTree= stack.pop();
271
272                                OWLClassExpression rootClass = currentTree.getRoot();
273                                //                      System.out.println("Root class: "+ rootClass);
274                                if (rootClass.equals(dataFactory.getOWLThing())){
275                                        stop=true;
276                                        result=+1;
277
278                                }
279                                else if (rootClass.equals(dataFactory.getOWLNothing())){
280                                        stop=true;
281                                        result=-1;
282
283                                }else if (reasoner.hasType(rootClass, indTestEx))
284                                        stack.push(currentTree.getPosSubTree());
285                                else
286                                        stack.push(currentTree.getNegSubTree()); // for those kb having no full complement
287
288                        }
289                }
290        
291
292        return result;
293
294}
295
296/**
297 * Alternative exploration of a Tree classifier for DL ontology
298 * @param list
299 * @param indTestEx
300 * @param tree
301 * @return
302 */
303public int classifyExample(List<Integer> list, OWLIndividual indTestEx, DLTree tree) {
304        Stack<DLTree> stack= new Stack<>();
305        //OWLDataFactory dataFactory = kb.getDataFactory();
306        stack.add(tree);
307        int result=0;
308        boolean stop=false;
309        while(!stack.isEmpty() && !stop){
310                DLTree currentTree= stack.pop();
311
312                OWLClassExpression rootClass = currentTree.getRoot();
313                //                      System.out.println("Root class: "+ rootClass);
314                if (rootClass.equals(dataFactory.getOWLThing())){
315                        //                              stop=true;
316                        result=+1;
317                        list.add(result);
318
319                }
320                else if (rootClass.equals(dataFactory.getOWLNothing())){
321                        //                              stop=true;
322                        result=-1;
323                        list.add(result);
324
325                }else if (reasoner.hasType(rootClass, indTestEx))
326                        stack.push(currentTree.getPosSubTree());
327                else if (reasoner.hasType(dataFactory.getOWLObjectComplementOf(rootClass), indTestEx))
328                        stack.push(currentTree.getNegSubTree());
329                else {
330                        //                              stop=true;
331                        result=0;
332                        stack.push(currentTree.getPosSubTree());
333                        stack.push(currentTree.getNegSubTree());
334
335                }
336        }
337
338        int posFr= Collections.frequency(list, +1);
339        int negFr= Collections.frequency(list, -1);
340
341        if (posFr>negFr)
342                return +1;
343        else
344                return -1;
345
346}
347
348/*protected OWLClassExpression selectBestConcept(OWLClassExpression[] concepts, ArrayList<OWLIndividual> posExs, ArrayList<OWLIndividual> negExs,
349                ArrayList<OWLIndividual> undExs, double prPos, double prNeg) {
350
351        int[] counts;
352
353        int bestConceptIndex = 0;
354
355        counts = getSplitCounts(concepts[0], posExs, negExs, undExs);
356        System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ",
357                        "#"+0, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
358
359        double bestGain = gain(counts, prPos, prNeg);
360
361        System.out.printf("%+10e\n",bestGain);
362
363        System.out.println(concepts[0]);
364
365        for (int c=1; c<concepts.length; c++) {
366
367                counts = getSplitCounts(concepts[c], posExs, negExs, undExs);
368                System.out.printf("%4s\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t p:%d n:%d u:%d\t ",
369                                "#"+c, counts[0], counts[1], counts[2], counts[3], counts[4], counts[5], counts[6], counts[7], counts[8]);
370
371                double thisGain = gain(counts, prPos, prNeg);
372                System.out.printf("%+10e\n",thisGain);
373                System.out.println(concepts[c]);
374                if(thisGain < bestGain) {
375                        bestConceptIndex = c;
376                        bestGain = thisGain;
377                }
378        }
379
380        System.out.printf("best gain: %f \t split #%d\n", bestGain, bestConceptIndex);
381        return concepts[bestConceptIndex];
382}*/
383
384public AbstractTDTClassifier() {
385        super();
386}
387
388/*private int[] getSplitCounts(OWLClassExpression concept, ArrayList<OWLIndividual> posExs, ArrayList<OWLIndividual> negExs,
389                ArrayList<OWLIndividual> undExs) {
390        
391        int[] counts = new int[9];
392        ArrayList<OWLIndividual> posExsT = new ArrayList<OWLIndividual>();
393        ArrayList<OWLIndividual> negExsT = new ArrayList<OWLIndividual>();
394        ArrayList<OWLIndividual> undExsT = new ArrayList<OWLIndividual>();
395
396        ArrayList<OWLIndividual> posExsF = new ArrayList<OWLIndividual>();
397        ArrayList<OWLIndividual> negExsF = new ArrayList<OWLIndividual>();
398        ArrayList<OWLIndividual> undExsF = new ArrayList<OWLIndividual>();
399
400        ArrayList<OWLIndividual> posExsU = new ArrayList<OWLIndividual>();
401        ArrayList<OWLIndividual> negExsU = new ArrayList<OWLIndividual>();
402        ArrayList<OWLIndividual> undExsU = new ArrayList<OWLIndividual>();
403
404        splitGroup(concept,posExs,posExsT,posExsF,posExsU);
405        splitGroup(concept,negExs,negExsT,negExsF,negExsU);
406        splitGroup(concept,undExs,undExsT,undExsF,undExsU);
407
408        counts[POSITIVE_INSTANCE_CHECK_TRUE] = posExsT.size();
409        counts[NEGATIVE_INSTANCE_CHECK_TRUE] = negExsT.size();
410        counts[UNCERTAIN_INSTANCE_CHECK_TRUE] = undExsT.size();
411        counts[POSITIVE_INSTANCE_CHECK_FALSE] = posExsF.size();
412        counts[NEGATIVE_INSTANCE_CHECK_FALSE] = negExsF.size();
413        counts[UNCERTAIN_INSTANCE_CHECK_FALSE] = undExsF.size();
414        counts[POSITIVE_INSTANCE_CHECK_UNC] = posExsU.size();
415        counts[NEGATIVE_INSTANCE_CHECK_UNC] = negExsU.size();
416        counts[UNCERTAIN_INSTANCE_CHECK_UNC] = undExsU.size();
417        //              for(int i=0; i<counts.length;i++)
418        //                      System.out.println(counts[i]);
419
420        return counts;
421
422}*/
423
424//public abstract void prune(Integer[] pruningSet, AbstractTree tree, AbstractTree subtree);
425}