001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.decisiontrees.tdt;
020
021import java.util.ArrayList;
022import java.util.Set;
023import java.util.SortedSet;
024import java.util.Stack;
025import java.util.TreeSet;
026
027//import org.dllearner.algorithms.celoe.CELOE;
028import org.dllearner.core.AbstractClassExpressionLearningProblem;
029import org.dllearner.core.AbstractReasonerComponent;
030import org.dllearner.core.ComponentAnn;
031import org.dllearner.core.ComponentInitException;
032import org.dllearner.core.EvaluatedDescription;
033import org.dllearner.refinementoperators.RefinementOperator;
034import org.semanticweb.owlapi.model.OWLClassExpression;
035import org.semanticweb.owlapi.model.OWLIndividual;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038import org.dllearner.algorithms.decisiontrees.refinementoperators.DLTreesRefinementOperator;
039import org.dllearner.algorithms.decisiontrees.tdt.model.DLTree;
040import org.dllearner.algorithms.decisiontrees.utils.Couple;
041import org.dllearner.algorithms.decisiontrees.utils.Npla;
042import org.dllearner.algorithms.decisiontrees.utils.Split;
043import org.dllearner.learningproblems.PosNegUndLP;
044//import evaluation.Parameters;
045//import knowledgeBasesHandler.KnowledgeBase;
046@ComponentAnn(name="TDT", shortName="tdt", version=1.0, description="A Terminological Decision Tree")
047
048public class TDTClassifier extends AbstractTDTClassifier {
049        private static Logger logger = LoggerFactory.getLogger(TDTClassifier.class);
050        private DLTree currentmodel;
051        
052
053 //private RefinementOperator op;
054
055        /**
056         * Empty constructor for Spring
057         */
058        public TDTClassifier(){
059                super();
060
061        }
062
063        
064        //      public TDTClassifier(KnowledgeBase k){
065        //
066        //              super(k);
067        //
068        //      }
069        //
070
071        public TDTClassifier(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner, RefinementOperator op){
072                super(problem, reasoner,op);
073        }
074        
075        @Override
076        public void init() throws ComponentInitException{
077                
078                super.init();
079//              if(operator==null){
080//                      operator=new TDTRefinementOperatorWrapper(super.learningProblem,reasoner,10);
081//                      
082//              }
083//              else{
084//                      ((TDTRefinementOperatorWrapper)operator).setBeam(10);
085//                      ((TDTRefinementOperatorWrapper)operator).setLp(learningProblem);
086//                      ((TDTRefinementOperatorWrapper)operator).setRs(reasoner);
087//              }
088                initialized = true;
089        }
090        
091        
092        @Override
093        public DLTree induceDLTree(SortedSet<OWLIndividual> posExs, SortedSet<OWLIndividual> negExs, SortedSet<OWLIndividual> undExs) {
094                logger.info("Learning problem\t p:"+posExs.size()+"\t n:"+negExs.size()+"\t u:"+undExs.size()+"\t prPos:"+prPos+"\t prNeg:"+prNeg+"\n");
095                //ArrayList<OWLIndividual> truePos= posExs;
096                //ArrayList<OWLIndividual> trueNeg= negExs;
097                
098                DLTreesRefinementOperator dlTreesRefinementOperator = (DLTreesRefinementOperator)operator;
099                
100                Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> examples = new Npla<>(posExs, negExs, undExs, 10, prPos, prNeg);
101                DLTree tree = new DLTree(); // new (sub)tree
102                Stack<Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>>> stack= new Stack<>();
103                Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> toInduce= new Couple<>();
104                toInduce.setFirstElement(tree);
105                toInduce.setSecondElement(examples);
106                stack.push(toInduce);
107                
108                Stack<DLTree> lastTrees= new Stack<>(); // for refine hierarchically a concept
109
110                while(!stack.isEmpty()){
111                        //System.out.printf("Stack: %d \n",stack.size());
112                        Couple<DLTree, Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double>> current= stack.pop(); // extract the next element
113                        DLTree currentTree= current.getFirstElement();
114                        Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> currentExamples= current.getSecondElement();
115                        // set of negative, positive and undefined example
116                        posExs=currentExamples.getFirst();
117                        negExs=currentExamples.getSecond();
118                        undExs=currentExamples.getThird();
119                        if (posExs.size() == 0 && negExs.size() == 0) // no exs
120                                if (prPos >= prNeg) { // prior majority 
121                                        currentTree.setRoot(OWL_THING); // set positive leaf
122                                }
123                                else { // prior majority of negatives
124                                        currentTree.setRoot(OWL_NOTHING); // set negative leaf
125                                }
126
127                        //              double numPos = posExs.size() + undExs.size()*prPos;
128                        //              double numNeg = negExs.size() + undExs.size()*prNeg;
129                        else{
130                                double numPos = posExs.size();
131                                double numNeg = negExs.size();
132                                double perPos = numPos/(numPos+numNeg);
133                                double perNeg = numNeg/(numPos+numNeg);
134                                //                              prPos=perPos;
135                                //                              prNeg=perNeg;
136
137                                if (perNeg==0 && perPos > puritythreshold) { // no negative
138                                        currentTree.setRoot(dataFactory.getOWLThing()); // set positive leaf
139
140                                }
141                                else{
142                                        if (perPos==0 && perNeg > puritythreshold) { // no positive                     
143                                                currentTree.setRoot(dataFactory.getOWLNothing()); // set negative leaf
144
145                                        }               
146                                        // else (a non-leaf node) ...
147                                        else{
148                                                OWLClassExpression[] cConcepts= new OWLClassExpression[0];
149                                                                                                
150                                                Set<OWLClassExpression> refine = null; //dlTreesRefinementOperator.refine(dataFactory.getOWLThing(), posExs, negExs);
151                                                if (lastTrees.isEmpty()){
152                                                        refine = dlTreesRefinementOperator.refine(dataFactory.getOWLThing(), posExs, negExs);
153                                                }
154                                                else
155                                                        refine = dlTreesRefinementOperator.refine(lastTrees.pop().getRoot(), posExs, negExs);
156                                        
157                                                
158                                                
159                                                ArrayList<OWLClassExpression> cConceptsL = new ArrayList<>(refine);
160                                                //                                              cConceptsL= getRandomSelection(cConceptsL); // random selection of feature set
161
162                                                
163                                                cConcepts = cConceptsL.toArray(cConcepts);
164
165                                                // select node concept
166                                                //OWLClassExpression newRootConcept = //Parameters.CCP?(h
167                                                                OWLClassExpression newRootConcept =      null;
168                                                                if  (dlTreesRefinementOperator.getRo()==DLTreesRefinementOperator.ORIGINAL)
169                                                                                newRootConcept= ccp?heuristic.selectBestConceptCCP(cConcepts, posExs, negExs, undExs, prPos, prNeg):(heuristic.selectBestConcept(cConcepts, posExs, negExs, undExs, prPos, prNeg));
170                                                                                else
171                                                                                        newRootConcept= heuristic.selectWorstConcept(cConcepts, posExs, negExs, undExs, perPos, perNeg);
172                                                SortedSet<OWLIndividual> posExsT = new TreeSet<>();
173                                                SortedSet<OWLIndividual> negExsT = new TreeSet<>();
174                                                SortedSet<OWLIndividual> undExsT = new TreeSet<>();
175                                                SortedSet<OWLIndividual> posExsF = new TreeSet<>();
176                                                SortedSet<OWLIndividual> negExsF = new TreeSet<>();
177                                                SortedSet<OWLIndividual> undExsF = new TreeSet<>();
178
179                                                Split.split(newRootConcept, dataFactory, reasoner, posExs, negExs, undExs, posExsT, negExsT, undExsT, posExsF, negExsF, undExsF);
180                                                // select node concept
181                                                currentTree.setRoot(newRootConcept);            
182                                                // build subtrees
183
184                                                
185                                                DLTree posTree= new DLTree();
186                                                DLTree negTree= new DLTree(); // recursive calls simulation
187                                                currentTree.setPosTree(posTree);
188                                                currentTree.setNegTree(negTree);
189                                                Npla<SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, SortedSet<OWLIndividual>, Integer, Double, Double> npla1 = new Npla<>(posExsT, negExsT, undExsT, 10, perPos, perNeg);
190                                                Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double> npla2 = new Npla<>(posExsF, negExsF, undExsF, 10, perPos, perNeg);
191                                                Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> pos= new Couple<>();
192                                                pos.setFirstElement(posTree);
193                                                pos.setSecondElement(npla1);
194
195                                                // negative branch
196                                                Couple<DLTree,Npla<SortedSet<OWLIndividual>,SortedSet<OWLIndividual>,SortedSet<OWLIndividual>, Integer, Double, Double>> neg= new Couple<>();
197                                                neg.setFirstElement(negTree);
198                                                neg.setSecondElement(npla2);
199                                                stack.push(neg);
200                                                stack.push(pos);
201                                                lastTrees.push(currentTree);
202                                        }
203                                }
204                        }
205                }
206                stop= true;
207                
208                return tree;
209
210        }
211
212/**
213 * Procedure for deriving a concept description from a TDT classifier
214 * @param model
215 * @return
216 */
217public OWLClassExpression deriveDefinition(DLTree model){
218        OWLClassExpression definition = DLTree.deriveDefinition(model, true);
219        if (definition.isOWLThing())
220        return dataFactory.getOWLObjectComplementOf(DLTree.deriveDefinition(model, false)); // derive concept definition from the positive instances
221        else return definition;
222        //it is possible to derive the complement concept description by setting the flag to false
223        
224}
225        
226
227@Override
228public void start() {
229
230        // TODO Auto-generated method stub
231        
232        PosNegUndLP posNegUndLP = (PosNegUndLP)learningProblem;
233        SortedSet<OWLIndividual> posExs = (SortedSet<OWLIndividual>)(posNegUndLP.getPositiveExamples());
234        SortedSet<OWLIndividual> negExs = (SortedSet<OWLIndividual>)posNegUndLP.getNegativeExamples();
235        SortedSet<OWLIndividual> undExs = (SortedSet<OWLIndividual>)posNegUndLP.getUncertainExamples();
236
237        System.out.println(posExs.size());
238        System.out.println(negExs.size());
239        System.out.println(undExs.size());
240        if (binaryClassification){
241        SortedSet<OWLIndividual> allExamples= new TreeSet<>();
242        allExamples.addAll(posExs);
243        allExamples.addAll(negExs);
244        allExamples.addAll(undExs);
245        //System.out.printf("--- Query Concept #%d \n",c);
246        
247
248        
249        // the individuals of the ABox are the training individuals
250        
251        OWLIndividual[] trainingExs= allExamples.toArray(new OWLIndividual[allExamples.size()]);
252        Split.splitting(dataFactory, reasoner, trainingExs, posExs, negExs, undExs, classToDescribe, binaryClassification);
253        }
254
255        prPos = (double)posExs.size()/(posExs.size()+ negExs.size()+ undExs.size());
256        prNeg = (double)negExs.size()/(posExs.size()+ negExs.size()+ undExs.size());
257
258        logger.debug("Training set composition: "+ posExs.size()+" - "+ negExs.size()+"-"+undExs.size());
259
260        double normSum = prPos+prNeg;
261        if (normSum==0) { prPos=.5;     prNeg=.5; }
262        else { prPos=prPos/normSum;     prNeg=prNeg/normSum; }
263
264        logger.info("New learning problem prepared.\n");
265        logger.info("Learning a tree ");
266        
267        
268        DLTree tree= this.induceDLTree(posExs, negExs, undExs); //tree induction
269        currentmodel=tree;
270        
271        stop();
272        
273                        
274}
275        
276        @Override
277        public OWLClassExpression getCurrentlyBestDescription() {
278                // TODO Auto-generated method stub
279                return DLTree.deriveDefinition(currentmodel, false);
280        }
281
282        @Override
283        public EvaluatedDescription getCurrentlyBestEvaluatedDescription() {
284                // TODO Auto-generated method stub
285                return null;
286        }
287
288        public DLTree getCurrentmodel() {
289                // TODO Auto-generated method stub
290                return currentmodel;
291        }
292
293}