001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.heuristics;
020
021import com.google.common.collect.ComparisonChain;
022import org.dllearner.algorithms.qtl.datastructures.impl.EvaluatedRDFResourceTree;
023import org.dllearner.core.AbstractComponent;
024import org.dllearner.core.Heuristic;
025import org.dllearner.learningproblems.Heuristics;
026import org.dllearner.learningproblems.Heuristics.HeuristicType;
027import org.dllearner.learningproblems.QueryTreeScore;
028import org.semanticweb.owlapi.model.OWLIndividual;
029
030import java.util.Set;
031
032/**
033 * @author Lorenz Buehmann
034 *
035 */
036public abstract class QueryTreeHeuristic extends AbstractComponent implements Heuristic<EvaluatedRDFResourceTree> {
037
038        protected double posExamplesWeight = 1.0;
039        protected HeuristicType heuristicType = HeuristicType.PRED_ACC;
040        
041
042        public QueryTreeHeuristic() {
043                super();
044        }
045        
046        public abstract double getScore(EvaluatedRDFResourceTree tree);
047
048        @Override
049        public double getNodeScore(EvaluatedRDFResourceTree node) {
050                return getScore(node);
051        }
052
053        /**
054         * @param posExamplesWeight the posExamplesWeight to set
055         */
056        public void setPosExamplesWeight(double posExamplesWeight) {
057                this.posExamplesWeight = posExamplesWeight;
058        }
059        
060        protected double getAccuracy(EvaluatedRDFResourceTree tree) {
061                QueryTreeScore treeScore = tree.getTreeScore();
062
063                Set<OWLIndividual> truePositives = treeScore.getCoveredPositives();
064                Set<OWLIndividual> trueNegatives = treeScore.getNotCoveredNegatives();
065                Set<OWLIndividual> falsePositives = treeScore.getNotCoveredPositives();
066                Set<OWLIndividual> falseNegatives = treeScore.getCoveredNegatives();
067
068                double tp = truePositives.size();
069                double tn = trueNegatives.size();
070                double fp = falsePositives.size();
071                double fn = falseNegatives.size();
072
073                double accuracy = 0;
074                switch (heuristicType) {
075                case FMEASURE:
076                        accuracy = Heuristics.getFScore(tp / (tp + fn), tp / (tp + fp), posExamplesWeight);
077                        break;
078                case PRED_ACC:
079                        accuracy = (1 / posExamplesWeight * tp + tn) / (1 / posExamplesWeight * (tp + fn) + (tn + fp));
080                        break;
081                case ENTROPY: {
082                        double total = tp + fn;
083                        double pp = tp / total;
084                        double pn = fn / total;
085                        accuracy = pp * Math.log(pp) + pn * Math.log(pn);
086                        break;
087                }
088                case MATTHEWS_CORRELATION:
089                        accuracy = (tp * tn - fp * fn) / Math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
090                        break;
091                case YOUDEN_INDEX:
092                        accuracy = tp / (tp + fn) + tn / (fp + tn) - 1;
093                        break;
094                default:
095                        break;
096
097                }
098                return accuracy;
099        }
100
101        /**
102         * Returns the maximum achievable score according to the used score
103         * function.
104         * 
105         * @return
106         */
107        public double getMaximumAchievableScore(EvaluatedRDFResourceTree tree) {
108                QueryTreeScore treeScore = tree.getTreeScore();
109
110                Set<OWLIndividual> truePositives = treeScore.getCoveredPositives();
111                Set<OWLIndividual> trueNegatives = treeScore.getNotCoveredNegatives();
112                Set<OWLIndividual> falsePositives = treeScore.getCoveredNegatives();
113                Set<OWLIndividual> falseNegatives = treeScore.getNotCoveredPositives();
114
115                double tp = truePositives.size();
116                double tn = trueNegatives.size();
117                double fp = falsePositives.size();
118                double fn = falseNegatives.size();
119
120                return getMaximumAchievableScore(tp, tn, fp, fn);
121        }
122
123        /**
124         * Returns the maximum achievable accuracy score according to the used score
125         * function.
126         * The idea is as follows:
127         * For algorithms which make the current solution more general, we know that
128         * 1. all already covered positive examples remain covered
129         * 2. all already covered negative examples remain covered
130         * 3. uncovered positive examples might be covered by more general solutions
131         * 4. uncovered negative examples might be covered by more general solutions
132         * That means, in the optimal case we get a solution which covers all
133         * uncovered positive examples, but non of the uncovered negative examples.
134         * 
135         * @param tp
136         * @param tn
137         * @param fp
138         * @param fn
139         * @return
140         */
141        private double getMaximumAchievableScore(double tp, double tn, double fp, double fn) {
142                double mas = 0d;
143                switch (heuristicType) {
144                case FMEASURE:
145                        mas = Double.POSITIVE_INFINITY;
146                        break;
147                case PRED_ACC: // (tn + tp) / (tn + fp + fn + tp) -> (tn + (tp + fn)) / (tn + fp + fn + tp)
148                        //                      mas = (posExamplesWeight * tp + tn - fp) / (posExamplesWeight * (tp + fn) + tn + fp);
149                        mas = (posExamplesWeight * (tp + fn) + tn) / (posExamplesWeight * (tp + fn) + tn + fp);
150                        break;
151                case ENTROPY:
152                        mas = Double.POSITIVE_INFINITY;
153                        break;
154                case MATTHEWS_CORRELATION:
155                        mas = Double.POSITIVE_INFINITY;
156                        break;
157                case YOUDEN_INDEX:
158                        mas = Double.POSITIVE_INFINITY;
159                        break;
160                default:
161                        break;
162
163                }
164                return mas;
165        }
166
167        @Override
168        public int compare(EvaluatedRDFResourceTree tree1, EvaluatedRDFResourceTree tree2) {
169                return ComparisonChain.start()
170                                .compare(getScore(tree1), getScore(tree2))
171                                .compare(tree1.asEvaluatedDescription().getDescription(), tree2.asEvaluatedDescription().getDescription())
172                                .result();
173        }
174
175        /**
176         * @param heuristicType the type of accuracy measure to set
177         */
178        public void setHeuristicType(HeuristicType heuristicType) {
179                this.heuristicType = heuristicType;
180        }
181
182        /**
183         * @return the heuristicType
184         */
185        public HeuristicType getHeuristicType() {
186                return heuristicType;
187        }
188
189}