001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities.statistics;
020
021import org.apache.log4j.Level;
022import org.apache.log4j.Logger;
023
024import java.util.Arrays;
025
026/**
027 * Computes the Fleiss' Kappa value as described in (Fleiss, 1971). 
028 * Fleiss' Kappa is a statistical measure for assessing the reliability of agreement between 
029 * a fixed number of raters when assigning categorical ratings to a number of items or classifying items.
030 */
031public class FleissKappa
032{
033    private static Logger logger = Logger.getLogger(FleissKappa.class);
034 
035    /**
036     * Example from Wikipedia article
037     */
038    public static void main(String[] args)
039    {
040        Logger.getRootLogger().setLevel(Level.DEBUG);
041        short[][] mat = new short[][]
042        {
043            {0,0,0,0,14},
044            {0,2,6,4,2},
045            {0,0,3,5,6},
046            {0,3,9,2,0},
047            {2,2,8,1,1},
048            {7,7,0,0,0},
049            {3,2,6,3,0},
050            {2,5,3,2,2},
051            {6,5,2,1,0},
052            {0,2,2,3,7}
053        } ;
054 
055        float kappa = computeKappa(mat) ;
056        System.out.println(getInterpretation(kappa));
057    }
058 
059    /**
060     * Computes the Kappa value
061     * @param mat Matrix[subjects][categories]
062     * @return The Kappa value
063     */
064        public static float computeKappa(short[][] mat) {
065                final int n = checkEachLineCount(mat); 
066                final int N = mat.length;
067                final int k = mat[0].length;
068
069                if(n < 2){
070                        System.err.println("Only " + n + " raters per subject detected. There have to be at least 2 raters per subject");
071                        return -999;
072                }
073                logger.debug(n + " raters.");
074                logger.debug(N + " subjects.");
075                logger.debug(k + " categories.");
076
077                // Computing columns p[]
078                float[] p = new float[k];
079                for (int j = 0; j < k; j++) {
080                        p[j] = 0;
081                        for (short[] aMat : mat) {
082                                p[j] += aMat[j];
083                        }
084                        p[j] /= N * n;
085                }
086                logger.debug("p = " + Arrays.toString(p));
087
088                // Computing rows P[]
089                float[] P = new float[N];
090                for (int i = 0; i < N; i++) {
091                        P[i] = 0;
092                        for (int j = 0; j < k; j++) {
093                                P[i] += mat[i][j] * mat[i][j];
094                        }
095                        P[i] = (P[i] - n) / (n * (n - 1));
096                }
097                logger.debug("P = " + Arrays.toString(P));
098
099                // Computing Pbar
100                float Pbar = 0;
101                for (float Pi : P) {
102                        Pbar += Pi;
103                }
104                Pbar /= N;
105                logger.debug("Pbar = " + Pbar);
106
107                // Computing PbarE
108                float PbarE = 0;
109                for (float pj : p) {
110                        PbarE += pj * pj;
111                }
112                logger.debug("PbarE = " + PbarE);
113
114                final float kappa = (Pbar - PbarE) / (1 - PbarE);
115                logger.debug("kappa = " + kappa);
116
117                return kappa;
118        }
119 
120    /**
121     * Assert that each line has a constant number of ratings
122     * @param mat The matrix checked
123     * @return The number of ratings
124     * @throws IllegalArgumentException If lines contain different number of ratings
125     */
126        private static int checkEachLineCount(short[][] mat) {
127                int n = 0;
128                boolean firstLine = true;
129
130                for (short[] line : mat) {
131                        int count = 0;
132                        for (short cell : line) {
133                                count += cell;
134                        }
135                        if (firstLine) {
136                                n = count;
137                                firstLine = false;
138                        }
139                        if (n != count) {
140                                throw new IllegalArgumentException("Line count != " + n + " (n value).");
141                        }
142                }
143                return n;
144        }
145        
146        /**
147         * Landis and Koch (1977) gave the following table for interpreting κ values. This table is 
148         * however by no means universally accepted; They supplied no evidence to support it, basing it 
149         * instead on personal opinion. It has been noted that these guidelines may be more harmful than 
150         * helpful, as the number of categories and subjects will affect the magnitude of the value. 
151         * The kappa will be higher when there are fewer categories. (Wikipedia)
152         * @param kappa The Kappa value
153         */
154        public static String getInterpretation(float kappa){
155                String interpretation = "";
156                if(kappa < 0){
157                        interpretation = "Poor agreement";
158                } else if(0 <= kappa && kappa <= 0.2 ){
159                        interpretation = "Slight agreement";
160                } else if(0.2 < kappa && kappa <= 0.4 ){
161                        interpretation = "Fair agreement";
162                } else if(0.4 <= kappa && kappa <= 0.6 ){
163                        interpretation = "Moderate agreement";
164                } else if(0.6 <= kappa && kappa <= 0.8 ){
165                        interpretation = "Substantial agreement";
166                } else if(0.8 <= kappa && kappa <= 1 ){
167                        interpretation = "Almost perfect agreement";
168                }
169                
170                return interpretation;
171        }
172}
173