Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.learningproblems;
020
021import java.text.DecimalFormat;
022import java.util.Set;
023
024import org.semanticweb.owlapi.model.OWLEntity;
025
026import com.google.common.collect.Sets;
027
028/**
029 * Computes the score (a negative value) by comparing the classification results
030 * with ideal results.
031 * 
032 * TODO: The implementation is not very efficient, because some things are
033 * only computed to be able to present the score results. This means that
034 * it would be better to compute only the necessary computations and do
035 * the other ones only when they are needed to calculate statistical values.
036 * 
037 * @author Jens Lehmann
038 *
039 */
040public class ScoreThreeValued<T extends OWLEntity> extends ScorePosNeg<T> {
041        
042        private static final long serialVersionUID = -1780084688122949685L;
043
044        public enum ScoreMethod {POSITIVE, FULL}
045
046    // configuration options
047        private double accuracyPenalty;
048        private double errorPenalty;
049        private boolean penaliseNeutralExamples;
050        private double percentPerLengthUnit;
051        
052        // potential configuration options (not implemented as such, but one
053        // could so)
054        private boolean showCorrectClassifications = false;
055        private static ScoreMethod scoreMethod = ScoreMethod.POSITIVE;
056        
057        private Set<T> posClassified;
058        private Set<T> neutClassified;
059        private Set<T> negClassified;
060        private Set<T> posExamples;
061        private Set<T> neutExamples;
062        private Set<T> negExamples;
063        
064    private Set<T> posAsNeg;
065    private Set<T> negAsPos;
066    private Set<T> posAsNeut;
067    private Set<T> neutAsPos;
068    private Set<T> neutAsNeg;
069    private Set<T> negAsNeut;
070    private Set<T> posAsPos;
071    private Set<T> negAsNeg;
072    private Set<T> neutAsNeut;
073    
074    private double score;
075    private double accuracy;
076    private double accuracyOnExamples;
077    private double accuracyOnPositiveExamples;
078    private double errorRate;
079    
080    private int nrOfExamples;
081    private int conceptLength;
082    
083    public ScoreThreeValued(int conceptLength,
084                double accuracyPenalty,
085                double errorPenalty,
086                boolean penaliseNeutralExamples,
087                double percentPerLengthUnit,
088                Set<T> posClassified,
089                Set<T> neutClassified,
090                Set<T> negClassified,
091                Set<T> posExamples,
092                Set<T> neutExamples,
093                Set<T> negExamples) {
094        this.conceptLength = conceptLength;
095        this.accuracyPenalty = accuracyPenalty;
096        this.errorPenalty = errorPenalty;
097        this.penaliseNeutralExamples = penaliseNeutralExamples;
098        this.percentPerLengthUnit = percentPerLengthUnit;
099        this.posClassified = posClassified;
100        this.neutClassified = neutClassified;
101        this.negClassified = negClassified;
102        this.posExamples = posExamples;
103        this.neutExamples = neutExamples;
104        this.negExamples = negExamples;
105        nrOfExamples = posExamples.size()+negExamples.size();
106        computeClassificationMatrix();
107        computeStatistics();
108    }
109    
110    private void computeClassificationMatrix() {
111        posAsNeg = Sets.intersection(posExamples,negClassified);
112        negAsPos = Sets.intersection(negExamples,posClassified);
113        posAsNeut = Sets.intersection(posExamples,neutClassified);
114        neutAsPos = Sets.intersection(neutExamples,posClassified);
115        neutAsNeg = Sets.intersection(neutExamples,negClassified);
116        negAsNeut = Sets.intersection(negExamples,neutClassified);
117        // die 3 Berechnungen sind nicht so wichtig fï¿½r die Punktzahl, d.h. falls
118        // es Performance bringt, dann kann man sie auch ausgliedern
119        posAsPos = Sets.intersection(posExamples,posClassified);
120        negAsNeg = Sets.intersection(negExamples,negClassified);
121        neutAsNeut = Sets.intersection(neutExamples,neutClassified);
122    }
123    
124    private void computeStatistics() {
125        score = - posAsNeg.size()*errorPenalty
126        - negAsPos.size()*errorPenalty
127        - posAsNeut.size()*accuracyPenalty;
128        
129        if(scoreMethod==ScoreMethod.FULL)
130                score -= negAsNeut.size()*accuracyPenalty;
131        
132        if(penaliseNeutralExamples)
133                score -= (neutAsPos.size()*accuracyPenalty
134            + neutAsNeg.size()*accuracyPenalty);
135        
136        // TODO: man kÃ¶nnte hier statt error penalty auch accuracy penalty
137        // nehmen
138        double worstValue = nrOfExamples * errorPenalty;
139        // ergibt Zahl zwischen -1 und 0
140        score = score / worstValue;
141        score -= percentPerLengthUnit * conceptLength;
142        
143        // die folgenden Berechnungen kï¿½nnten aus Performancegrï¿½nden auch
144        // ausgegliedert werden
145        // int domainSize = abox.domain.size();
146        int numberOfExamples = posExamples.size()+negExamples.size();
147        int domainSize = numberOfExamples + neutExamples.size();
148        int correctlyClassified = posAsPos.size() + negAsNeg.size() + neutAsNeut.size();
149        int correctOnExamples = posAsPos.size() + negAsNeg.size();
150        int errors = posAsNeg.size() + negAsPos.size();
151        
152        // Accuracy = Quotient von richtig klassifizierten durch Anzahl Domainelemente
153        accuracy = (double) correctlyClassified/domainSize;
154        
155        // Accuracy on Examples = Quotient von richtig klassifizierten durch Anzahl pos.
156        // und neg. Beispiele
157        accuracyOnExamples = (double) correctOnExamples/numberOfExamples;
158        
159        accuracyOnPositiveExamples = (double) posAsPos.size()/posExamples.size();
160        
161        // Error = Quotient von komplett falsch klassifizierten durch Anzahl pos.
162        // und neg. Beispiele
163        errorRate = (double) errors/numberOfExamples;
164    }
165
166    @Override
167    public double getScoreValue() {
168        return score;
169    }
170    
171        /**
172         * @return number of cases of individuals that got exactly the same
173         *         classification with both definitions
174         */
175    public int getMatchRate() {
176        return posAsPos.size() + negAsNeg.size();
177    }
178    
179        /**
180         * @return amount of individuals for which class-membership w.r.t.
181         *         the given query could not determined using the induced
182         *         definition, while they actually belong (do not belong) to the
183         *         query concept
184         */
185    public int getOmmissionErrorRate() {
186        return posAsNeut.size() + negAsNeut.size();
187    }
188    
189        /**
190         * @return amount of individuals found not to belong to the query concept
191         *         according to the induced definition, while they actually belong
192         *         to it and vice-versa
193         */
194    public int getCommissionErrorRate() {
195        return posAsNeg.size() + negAsPos.size();
196    }
197    
198        /**
199         * @return amount of individuals found to belong or not to belong to the
200         *         query concept according to the induced definition, while either
201         *         case is not logically derivable from the knowledge base with the
202         *         original definition
203         */
204    public int getInductionRate() {
205        return neutAsPos.size() + neutAsNeg.size();
206    }
207    
208    @Override
209    public String toString() {
210        DecimalFormat df = new DecimalFormat("0.00");
211        String str = "";
212        str += "score method ";
213        if(scoreMethod == ScoreMethod.FULL)
214                str += "full";
215        else
216                str += "positive";
217        if(!penaliseNeutralExamples)
218                str += " (neutral examples not penalized)";
219        str += "\n";
220        if(showCorrectClassifications) {
221            str += "Correctly classified:\n";
222            str += "  positive --> positive: " + posAsPos + "\n";
223            str += "  neutral --> neutral: " + neutAsNeut + "\n";
224            str += "  negative --> negative: " + negAsNeg + "\n";
225        }
226        str += "Inaccurately classified (penalty of " + df.format(accuracyPenalty) + " per instance):\n";
227        str += "  positive --> neutral: " + posAsNeut + "\n";
228        if(penaliseNeutralExamples) {
229                str += "  neutral --> positive: " + neutAsPos + "\n";
230                str += "  neutral --> negative: " + neutAsNeg + "\n";
231        }
232        if(scoreMethod == ScoreMethod.FULL)
233                str += "  negative --> neutral: " + negAsNeut + "\n";
234        str += "Classification errors (penalty of " + df.format(errorPenalty) + " per instance):\n";
235        str += "  positive --> negative: " + posAsNeg + "\n";
236        str += "  negative --> positive: " + negAsPos + "\n";
237        str += "Statistics:\n";
238        str += "  Score: " + df.format(score) + "\n";
239        str += "  Accuracy: " + df.format(accuracy*100) + "%\n";
240        str += "  Accuracy on examples: " + df.format(accuracyOnExamples*100) + "%\n";
241        str += "  Accuracy on positive examples: " + df.format(accuracyOnPositiveExamples*100) + "%\n";
242        str += "  Error rate: " + df.format(errorRate*100) + "%\n";
243        return str;
244    }
245
246        public Set<T> getNegClassified() {
247                return negClassified;
248        }
249
250        public Set<T> getPosClassified() {
251                return posClassified;
252        }
253
254        @Override
255        public Set<T> getCoveredNegatives() {
256                return negAsPos;
257        }
258
259        @Override
260        public Set<T> getCoveredPositives() {
261                return posAsPos;
262        }
263        
264        @Override
265        public Set<T> getNotCoveredPositives() {
266                return posAsNeg;
267        }
268        
269        /* (non-Javadoc)
270         * @see org.dllearner.core.Score#getNotCoveredNegatives()
271         */
272        @Override
273        public Set<T> getNotCoveredNegatives() {
274                return negAsNeg;
275        }
276
277        @Override
278        public ScorePosNeg<T> getModifiedLengthScore(int newLength) {
279                return new ScoreThreeValued<>(newLength, accuracyPenalty, errorPenalty, penaliseNeutralExamples, percentPerLengthUnit, posClassified, neutClassified, negClassified, posExamples, neutExamples, negExamples);
280        }
281
282        /* (non-Javadoc)
283         * @see org.dllearner.core.Score#getAccuracy()
284         */
285        @Override
286        public double getAccuracy() {
287                return accuracy;
288        }
289}