001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.learningproblems; 020 021import java.text.DecimalFormat; 022import java.util.Set; 023 024import org.semanticweb.owlapi.model.OWLEntity; 025 026import com.google.common.collect.Sets; 027 028/** 029 * Computes the score (a negative value) by comparing the classification results 030 * with ideal results. 031 * 032 * TODO: The implementation is not very efficient, because some things are 033 * only computed to be able to present the score results. This means that 034 * it would be better to compute only the necessary computations and do 035 * the other ones only when they are needed to calculate statistical values. 036 * 037 * @author Jens Lehmann 038 * 039 */ 040public class ScoreThreeValued<T extends OWLEntity> extends ScorePosNeg<T> { 041 042 private static final long serialVersionUID = -1780084688122949685L; 043 044 public enum ScoreMethod {POSITIVE, FULL} 045 046 // configuration options 047 private double accuracyPenalty; 048 private double errorPenalty; 049 private boolean penaliseNeutralExamples; 050 private double percentPerLengthUnit; 051 052 // potential configuration options (not implemented as such, but one 053 // could so) 054 private boolean showCorrectClassifications = false; 055 private static ScoreMethod scoreMethod = ScoreMethod.POSITIVE; 056 057 private Set<T> posClassified; 058 private Set<T> neutClassified; 059 private Set<T> negClassified; 060 private Set<T> posExamples; 061 private Set<T> neutExamples; 062 private Set<T> negExamples; 063 064 private Set<T> posAsNeg; 065 private Set<T> negAsPos; 066 private Set<T> posAsNeut; 067 private Set<T> neutAsPos; 068 private Set<T> neutAsNeg; 069 private Set<T> negAsNeut; 070 private Set<T> posAsPos; 071 private Set<T> negAsNeg; 072 private Set<T> neutAsNeut; 073 074 private double score; 075 private double accuracy; 076 private double accuracyOnExamples; 077 private double accuracyOnPositiveExamples; 078 private double errorRate; 079 080 private int nrOfExamples; 081 private int conceptLength; 082 083 public ScoreThreeValued(int conceptLength, 084 double accuracyPenalty, 085 double errorPenalty, 086 boolean penaliseNeutralExamples, 087 double percentPerLengthUnit, 088 Set<T> posClassified, 089 Set<T> neutClassified, 090 Set<T> negClassified, 091 Set<T> posExamples, 092 Set<T> neutExamples, 093 Set<T> negExamples) { 094 this.conceptLength = conceptLength; 095 this.accuracyPenalty = accuracyPenalty; 096 this.errorPenalty = errorPenalty; 097 this.penaliseNeutralExamples = penaliseNeutralExamples; 098 this.percentPerLengthUnit = percentPerLengthUnit; 099 this.posClassified = posClassified; 100 this.neutClassified = neutClassified; 101 this.negClassified = negClassified; 102 this.posExamples = posExamples; 103 this.neutExamples = neutExamples; 104 this.negExamples = negExamples; 105 nrOfExamples = posExamples.size()+negExamples.size(); 106 computeClassificationMatrix(); 107 computeStatistics(); 108 } 109 110 private void computeClassificationMatrix() { 111 posAsNeg = Sets.intersection(posExamples,negClassified); 112 negAsPos = Sets.intersection(negExamples,posClassified); 113 posAsNeut = Sets.intersection(posExamples,neutClassified); 114 neutAsPos = Sets.intersection(neutExamples,posClassified); 115 neutAsNeg = Sets.intersection(neutExamples,negClassified); 116 negAsNeut = Sets.intersection(negExamples,neutClassified); 117 // die 3 Berechnungen sind nicht so wichtig f�r die Punktzahl, d.h. falls 118 // es Performance bringt, dann kann man sie auch ausgliedern 119 posAsPos = Sets.intersection(posExamples,posClassified); 120 negAsNeg = Sets.intersection(negExamples,negClassified); 121 neutAsNeut = Sets.intersection(neutExamples,neutClassified); 122 } 123 124 private void computeStatistics() { 125 score = - posAsNeg.size()*errorPenalty 126 - negAsPos.size()*errorPenalty 127 - posAsNeut.size()*accuracyPenalty; 128 129 if(scoreMethod==ScoreMethod.FULL) 130 score -= negAsNeut.size()*accuracyPenalty; 131 132 if(penaliseNeutralExamples) 133 score -= (neutAsPos.size()*accuracyPenalty 134 + neutAsNeg.size()*accuracyPenalty); 135 136 // TODO: man könnte hier statt error penalty auch accuracy penalty 137 // nehmen 138 double worstValue = nrOfExamples * errorPenalty; 139 // ergibt Zahl zwischen -1 und 0 140 score = score / worstValue; 141 score -= percentPerLengthUnit * conceptLength; 142 143 // die folgenden Berechnungen k�nnten aus Performancegr�nden auch 144 // ausgegliedert werden 145 // int domainSize = abox.domain.size(); 146 int numberOfExamples = posExamples.size()+negExamples.size(); 147 int domainSize = numberOfExamples + neutExamples.size(); 148 int correctlyClassified = posAsPos.size() + negAsNeg.size() + neutAsNeut.size(); 149 int correctOnExamples = posAsPos.size() + negAsNeg.size(); 150 int errors = posAsNeg.size() + negAsPos.size(); 151 152 // Accuracy = Quotient von richtig klassifizierten durch Anzahl Domainelemente 153 accuracy = (double) correctlyClassified/domainSize; 154 155 // Accuracy on Examples = Quotient von richtig klassifizierten durch Anzahl pos. 156 // und neg. Beispiele 157 accuracyOnExamples = (double) correctOnExamples/numberOfExamples; 158 159 accuracyOnPositiveExamples = (double) posAsPos.size()/posExamples.size(); 160 161 // Error = Quotient von komplett falsch klassifizierten durch Anzahl pos. 162 // und neg. Beispiele 163 errorRate = (double) errors/numberOfExamples; 164 } 165 166 @Override 167 public double getScoreValue() { 168 return score; 169 } 170 171 /** 172 * @return number of cases of individuals that got exactly the same 173 * classification with both definitions 174 */ 175 public int getMatchRate() { 176 return posAsPos.size() + negAsNeg.size(); 177 } 178 179 /** 180 * @return amount of individuals for which class-membership w.r.t. 181 * the given query could not determined using the induced 182 * definition, while they actually belong (do not belong) to the 183 * query concept 184 */ 185 public int getOmmissionErrorRate() { 186 return posAsNeut.size() + negAsNeut.size(); 187 } 188 189 /** 190 * @return amount of individuals found not to belong to the query concept 191 * according to the induced definition, while they actually belong 192 * to it and vice-versa 193 */ 194 public int getCommissionErrorRate() { 195 return posAsNeg.size() + negAsPos.size(); 196 } 197 198 /** 199 * @return amount of individuals found to belong or not to belong to the 200 * query concept according to the induced definition, while either 201 * case is not logically derivable from the knowledge base with the 202 * original definition 203 */ 204 public int getInductionRate() { 205 return neutAsPos.size() + neutAsNeg.size(); 206 } 207 208 @Override 209 public String toString() { 210 DecimalFormat df = new DecimalFormat("0.00"); 211 String str = ""; 212 str += "score method "; 213 if(scoreMethod == ScoreMethod.FULL) 214 str += "full"; 215 else 216 str += "positive"; 217 if(!penaliseNeutralExamples) 218 str += " (neutral examples not penalized)"; 219 str += "\n"; 220 if(showCorrectClassifications) { 221 str += "Correctly classified:\n"; 222 str += " positive --> positive: " + posAsPos + "\n"; 223 str += " neutral --> neutral: " + neutAsNeut + "\n"; 224 str += " negative --> negative: " + negAsNeg + "\n"; 225 } 226 str += "Inaccurately classified (penalty of " + df.format(accuracyPenalty) + " per instance):\n"; 227 str += " positive --> neutral: " + posAsNeut + "\n"; 228 if(penaliseNeutralExamples) { 229 str += " neutral --> positive: " + neutAsPos + "\n"; 230 str += " neutral --> negative: " + neutAsNeg + "\n"; 231 } 232 if(scoreMethod == ScoreMethod.FULL) 233 str += " negative --> neutral: " + negAsNeut + "\n"; 234 str += "Classification errors (penalty of " + df.format(errorPenalty) + " per instance):\n"; 235 str += " positive --> negative: " + posAsNeg + "\n"; 236 str += " negative --> positive: " + negAsPos + "\n"; 237 str += "Statistics:\n"; 238 str += " Score: " + df.format(score) + "\n"; 239 str += " Accuracy: " + df.format(accuracy*100) + "%\n"; 240 str += " Accuracy on examples: " + df.format(accuracyOnExamples*100) + "%\n"; 241 str += " Accuracy on positive examples: " + df.format(accuracyOnPositiveExamples*100) + "%\n"; 242 str += " Error rate: " + df.format(errorRate*100) + "%\n"; 243 return str; 244 } 245 246 public Set<T> getNegClassified() { 247 return negClassified; 248 } 249 250 public Set<T> getPosClassified() { 251 return posClassified; 252 } 253 254 @Override 255 public Set<T> getCoveredNegatives() { 256 return negAsPos; 257 } 258 259 @Override 260 public Set<T> getCoveredPositives() { 261 return posAsPos; 262 } 263 264 @Override 265 public Set<T> getNotCoveredPositives() { 266 return posAsNeg; 267 } 268 269 /* (non-Javadoc) 270 * @see org.dllearner.core.Score#getNotCoveredNegatives() 271 */ 272 @Override 273 public Set<T> getNotCoveredNegatives() { 274 return negAsNeg; 275 } 276 277 @Override 278 public ScorePosNeg<T> getModifiedLengthScore(int newLength) { 279 return new ScoreThreeValued<>(newLength, accuracyPenalty, errorPenalty, penaliseNeutralExamples, percentPerLengthUnit, posClassified, neutClassified, negClassified, posExamples, neutExamples, negExamples); 280 } 281 282 /* (non-Javadoc) 283 * @see org.dllearner.core.Score#getAccuracy() 284 */ 285 @Override 286 public double getAccuracy() { 287 return accuracy; 288 } 289}