001package org.dllearner.algorithms.miles;
002
003import java.io.File;
004import java.io.IOException;
005import java.util.ArrayList;
006import java.util.List;
007import java.util.Random;
008import java.util.Set;
009import java.util.SortedSet;
010
011import org.dllearner.algorithms.celoe.CELOE;
012import org.dllearner.core.AbstractReasonerComponent;
013import org.dllearner.core.EvaluatedDescription;
014import org.dllearner.core.KnowledgeSource;
015import org.dllearner.core.Score;
016import org.dllearner.kb.OWLFile;
017import org.dllearner.learningproblems.ClassLearningProblem;
018import org.dllearner.learningproblems.EvaluatedDescriptionClass;
019import org.dllearner.learningproblems.PosNegLP;
020import org.dllearner.reasoning.ClosedWorldReasoner;
021import org.semanticweb.owlapi.model.IRI;
022import org.semanticweb.owlapi.model.OWLClassExpression;
023import org.semanticweb.owlapi.model.OWLIndividual;
024
025import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl;
026import weka.classifiers.AbstractClassifier;
027import weka.classifiers.Evaluation;
028import weka.classifiers.functions.LinearRegression;
029import weka.classifiers.trees.J48;
030import weka.core.Attribute;
031import weka.core.DenseInstance;
032import weka.core.Instance;
033import weka.core.Instances;
034import weka.core.converters.ArffSaver;
035
036import com.google.common.collect.Lists;
037import com.google.common.collect.Sets;
038/**
039 * @author Lorenz Buehmann
040 *
041 */
042public class DescriptionLinearClassifier {
043        
044        private AbstractReasonerComponent rc;
045        private Set<OWLIndividual> posExamples;
046        private Set<OWLIndividual> negExamples;
047        
048        private boolean writeArffFile = true;
049
050        public DescriptionLinearClassifier(PosNegLP lp, AbstractReasonerComponent rc) {
051                this(lp.getPositiveExamples(), lp.getNegativeExamples(), rc);
052        }
053        
054        public DescriptionLinearClassifier(ClassLearningProblem lp, AbstractReasonerComponent rc) {
055                this(rc.getIndividuals(lp.getClassToDescribe()), Sets.difference(rc.getIndividuals(),rc.getIndividuals(lp.getClassToDescribe())), rc);
056        }
057        
058        public DescriptionLinearClassifier(Set<OWLIndividual> posExamples, Set<OWLIndividual> negExamples, AbstractReasonerComponent rc) {
059                this.posExamples = posExamples;
060                this.negExamples = negExamples;
061                this.rc = rc;
062        }
063        
064        public void getLinearCombination(List<OWLClassExpression> descriptions){
065                //get common data
066                Instances data = buildData(descriptions);
067                
068                //compute linear regression model
069                data.setClassIndex(data.numAttributes() - 1);
070                AbstractClassifier model = new LinearRegression();
071                model = new J48();
072                try {
073                        model.buildClassifier(data);
074//                      System.out.println(model);
075                        
076//                      AddExpression filter = new AddExpression();
077//                      filter.setExpression("a1^2");
078//                      FilteredClassifier filteredClassifier = new FilteredClassifier();
079//                      filteredClassifier.setClassifier(model);
080//                      filteredClassifier.setFilter(filter);
081//                      filteredClassifier.buildClassifier(data);
082//                      logger.debug(filteredClassifier.getClassifier());
083                        
084                        Evaluation eval = new Evaluation(data);
085                        eval.crossValidateModel(model, data, 10, new Random(1));
086                        System.out.println(eval.toSummaryString(true));
087                        
088                } catch (Exception e) {
089                        e.printStackTrace();
090                }
091        }
092        
093        private Instances buildData(List<OWLClassExpression> descriptions){
094                //#attributes = #descriptions + 1 for the target class
095                int numAttributes = descriptions.size() + 1;
096                ArrayList<Attribute> attInfo = new ArrayList<>(numAttributes);
097                
098                for (int i = 0; i < descriptions.size(); i++) {
099                        attInfo.add(new Attribute("C_" + String.valueOf(i)));
100                }
101                attInfo.add(new Attribute("t", Lists.newArrayList("0","1")));
102                        
103                Instances data = new Instances("rel", attInfo, posExamples.size()+negExamples.size());
104                
105                //basically, we have two strategies to build the matrix:
106                //1. for each example check for each concept whether it's instance of
107                //2. for each concept get all instances
108                
109                //apply 2. strategy
110                List<SortedSet<OWLIndividual>> individualsList = new ArrayList<>(descriptions.size());
111                for (OWLClassExpression description : descriptions) {
112                        SortedSet<OWLIndividual> individuals = rc.getIndividuals(description);
113                        individualsList.add(individuals);
114                }
115                //handle pos examples
116                for (OWLIndividual posEx : posExamples) {
117                        double[] attValues = new double[numAttributes];
118                        
119                        for (int i = 0; i < descriptions.size(); i++) {
120                                attValues[i] = individualsList.get(i).contains(posEx) ? 1.0 : 0.0;
121                        }
122                        
123                        //last attribute value is 1
124                        attValues[numAttributes-1] = 1;
125                        
126                        Instance instance = new DenseInstance(1.0, attValues);
127                        data.add(instance);
128                        instance.setDataset(data);
129                }
130                
131                // handle neg examples
132                for (OWLIndividual negEx : negExamples) {
133                        double[] attValues = new double[numAttributes];
134
135                        for (int i = 0; i < descriptions.size(); i++) {
136                                attValues[i] = individualsList.get(i).contains(negEx) ? 1 : 0;
137                        }
138
139                        //last attribute value is 0
140                        attValues[numAttributes-1] = 0;
141
142                        Instance instance = new DenseInstance(1.0, attValues);
143                        data.add(instance);
144                }
145                System.out.println(data.toString());
146                if(writeArffFile ){
147                        try {
148                                writeArffFile(data, new File("./data/test.arff"));
149                        } catch (IOException e) {
150                                e.printStackTrace();
151                        }
152                }
153                return data;
154        }
155        
156        private void writeArffFile(Instances dataSet, File file) throws IOException {
157                ArffSaver saver = new ArffSaver();
158                saver.setInstances(dataSet);
159                saver.setFile(file);
160                saver.writeBatch();
161        }
162        
163        public static void main(String[] args) throws Exception {
164                KnowledgeSource ks = new OWLFile("../examples/swore/swore.rdf");
165                AbstractReasonerComponent rc = new ClosedWorldReasoner(ks);
166                rc.init();
167                ClassLearningProblem lp = new ClassLearningProblem(rc);
168                lp.setClassToDescribe(new OWLClassImpl(IRI.create("http://ns.softwiki.de/req/CustomerRequirement")));
169                lp.init();
170                CELOE celoe = new CELOE(lp, rc);
171                celoe.setNoisePercentage(1.0);
172                celoe.setMaxExecutionTimeInSeconds(3);
173                celoe.init();
174                celoe.start();
175                
176                List<OWLClassExpression> descriptions = new ArrayList<>();
177                for (EvaluatedDescription<? extends Score> ed : celoe.getCurrentlyBestEvaluatedDescriptions(100)) {
178                        if(((EvaluatedDescriptionClass)ed).getAdditionalInstances().size() > 0){
179                                System.out.println(ed);
180                                System.out.println(((EvaluatedDescriptionClass)ed).getAdditionalInstances());
181                                descriptions.add(ed.getDescription());
182                        }
183                        if(descriptions.size() == 3) break;
184                }
185//              descriptions.addAll(celoe.getCurrentlyBestDescriptions(2));
186//              descriptions.add(new NamedClass("http://ns.softwiki.de/req/Requirement"));
187//              descriptions.add(new NamedClass("http://ns.softwiki.de/req/Customer"));
188                
189                DescriptionLinearClassifier dlc = new DescriptionLinearClassifier(lp, rc);
190                dlc.getLinearCombination(descriptions);
191        }
192
193}