Source code

001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.ocel;
020
021import org.dllearner.core.Component;
022import org.dllearner.core.ComponentAnn;
023import org.dllearner.core.ComponentInitException;
024import org.dllearner.core.annotations.NoConfigOption;
025import org.dllearner.core.config.ConfigOption;
026import org.semanticweb.owlapi.model.OWLClassExpression;
027import org.semanticweb.owlapi.model.OWLDataSomeValuesFrom;
028import org.semanticweb.owlapi.model.OWLObjectComplementOf;
029
030import java.util.Set;
031
032/**
033 * This heuristic combines the following criteria to assign a
034 * double score value to a node:
035 * <ul>
036 * <li>quality/accuracy of a concept (based on the full training set, not
037 *   the negative example coverage as the flexible heuristic)</li>
038 * <li>horizontal expansion</li>
039 * <li>accuracy gain: The heuristic takes into account the accuracy
040 *   difference between a node and its parent. If there is no gain (even
041 *   though we know that the refinement is proper) it is unlikely (although
042 *   not excluded) that the refinement is a necessary path to take towards a
043 *   solution.</li>
044 * </ul> 
045 *
046 * The heuristic has two parameters:
047 * <ul>
048 * <li>expansion penalty factor: describes how much accuracy gain is worth
049 *   an increase of horizontal expansion by one (typical value: 0.01)</li>
050 * <li>gain bonus factor: describes how accuracy gain should be weighted
051 *   versus accuracy itself (typical value: 1.00)</li>
052 * </ul>
053 *   
054 * The value of a node is calculated as follows:
055 * 
056 * <p><code>value = accuracy + gain bonus factor * accuracy gain - expansion penalty
057 * factor * horizontal expansion - node children penalty factor * number of children of node</code></p>
058 * 
059 * <p><code>accuracy = (TP + TN)/(P + N)</code></p>
060 * 
061 * <p><code>
062 * TP = number of true positives (= covered positives)<br />
063 * TN = number of true negatives (= nr of negatives examples - covered negatives)<br />
064 * P = number of positive examples<br />
065 * N = number of negative examples<br />
066 * </code></p>
067 * 
068 * @author Jens Lehmann
069 *
070 */
071@ComponentAnn(name = "multiple criteria heuristic", shortName = "multiheuristic", version = 0.7)
072public class MultiHeuristic implements ExampleBasedHeuristic, Component {
073        
074//      private OCELConfigurator configurator;
075        
076        // heuristic parameters
077        
078        @ConfigOption(description = "how much accuracy gain is worth an increase of horizontal expansion by one (typical value: 0.01)", defaultValue="0.02")
079        private double expansionPenaltyFactor = 0.02;
080        
081        @ConfigOption(description = "how accuracy gain should be weighted versus accuracy itself (typical value: 1.00)", defaultValue="0.5")
082        private double gainBonusFactor = 0.5;
083        
084        @ConfigOption(description = "penalty factor for the search tree node child count (use higher values for simple learning problems)", defaultValue="0.0001")
085        private double nodeChildPenalty = 0.0001;
086        
087        @ConfigOption(description = "the score value for the start node", defaultValue="0.1")
088        private double startNodeBonus = 0.1; //was 2.0
089        
090        // penalise errors on positive examples harder than on negative examples
091        // (positive weight = 1)
092        @ConfigOption(description = "weighting factor on the number of true negatives (true positives are weigthed with 1)", defaultValue="1.0")
093        private double negativeWeight = 1.0; // was 0.8;
094        
095        @ConfigOption(description = "penalty value to deduce for using a negated class expression (complementOf)", defaultValue="0")
096        private int negationPenalty = 0;
097
098        // examples
099        @NoConfigOption
100        private int nrOfNegativeExamples;
101        @NoConfigOption
102        private int nrOfExamples;
103        
104        @Deprecated
105        public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples) {
106                this.nrOfNegativeExamples = nrOfNegativeExamples;
107                nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples;
108//              this(nrOfPositiveExamples, nrOfNegativeExamples, 0.02, 0.5);
109        }
110        
111        public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples, double negativeWeight, double startNodeBonus, double expansionPenaltyFactor, int negationPenalty) {
112                this.nrOfNegativeExamples = nrOfNegativeExamples;
113                nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples;
114//              this.configurator = configurator;
115                this.negativeWeight = negativeWeight;
116                this.startNodeBonus = startNodeBonus;
117                this.expansionPenaltyFactor = expansionPenaltyFactor;
118        }
119
120    public MultiHeuristic(){
121
122    }
123
124//      public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples, double expansionPenaltyFactor, double gainBonusFactor) {
125//              this.nrOfNegativeExamples = nrOfNegativeExamples;
126//              nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples;
127//              this.expansionPenaltyFactor = expansionPenaltyFactor;
128//              this.gainBonusFactor = gainBonusFactor;
129//      }
130
131        @Override
132        public void init() throws ComponentInitException {
133                // nothing to do here
134        }       
135        
136        /* (non-Javadoc)
137         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
138         */
139        @Override
140        public int compare(ExampleBasedNode node1, ExampleBasedNode node2) {
141                double score1 = getNodeScore(node1);
142                double score2 = getNodeScore(node2);
143                double diff = score1 - score2;
144                if(diff>0)
145                        return 1;
146                else if(diff<0)
147                        return -1;
148                else
149                        // we cannot return 0 here otherwise different nodes/concepts with the
150                        // same score may be ignored (not added to a set because an equal element exists)
151                        return node1.getConcept().compareTo(node2.getConcept());
152        }
153
154        public double getNodeScore(ExampleBasedNode node) {
155                double accuracy = getWeightedAccuracy(node.getCoveredPositives().size(),node.getCoveredNegatives().size());
156                ExampleBasedNode parent = node.getParent();
157                double gain = 0;
158                if(parent != null) {
159                        double parentAccuracy =  getWeightedAccuracy(parent.getCoveredPositives().size(),parent.getCoveredNegatives().size());
160                        gain = accuracy - parentAccuracy;
161                } else {
162                        accuracy += startNodeBonus;
163                }
164                int he = node.getHorizontalExpansion() - getHeuristicLengthBonus(node.getConcept());
165                return accuracy + gainBonusFactor * gain - expansionPenaltyFactor * he - nodeChildPenalty * node.getChildren().size();
166        }
167        
168        private double getWeightedAccuracy(int coveredPositives, int coveredNegatives) {
169                return (coveredPositives + negativeWeight * (nrOfNegativeExamples - coveredNegatives))/(double)nrOfExamples;
170        }
171        
172        public static double getNodeScore(ExampleBasedNode node, int nrOfPositiveExamples, int nrOfNegativeExamples, double negativeWeight, double startNodeBonus, double expansionPenaltyFactor, int negationPenalty) {
173                MultiHeuristic multi = new MultiHeuristic(nrOfPositiveExamples, nrOfNegativeExamples, negativeWeight, startNodeBonus, expansionPenaltyFactor, negationPenalty);
174                return multi.getNodeScore(node);
175        }
176        
177        // this function can be used to give some constructs a length bonus
178        // compared to their syntactic length
179        private int getHeuristicLengthBonus(OWLClassExpression description) {
180                
181                
182                int bonus = 0;
183                
184                Set<OWLClassExpression> nestedClassExpressions = description.getNestedClassExpressions();
185                for (OWLClassExpression expression : nestedClassExpressions) {
186                        // do not count TOP symbols (in particular in ALL r.TOP and EXISTS r.TOP)
187                        // as they provide no extra information
188                        if(expression.isOWLThing())
189                                bonus = 1; //2;
190                        
191                        // we put a penalty on negations, because they often overfit
192                        // (TODO: make configurable)
193                        else if(expression instanceof OWLObjectComplementOf) {
194                                bonus = -negationPenalty;
195                        }
196                        
197//                      if(OWLClassExpression instanceof BooleanValueRestriction)
198//                              bonus = -1;
199                        
200                        // some bonus for doubles because they are already penalised by length 3
201                        else if(expression instanceof OWLDataSomeValuesFrom) {
202//                              System.out.println(description);
203                                bonus = 3; //2;
204                        }
205                }
206                
207                return bonus;
208        }
209
210    public double getExpansionPenaltyFactor() {
211        return expansionPenaltyFactor;
212    }
213
214    public void setExpansionPenaltyFactor(double expansionPenaltyFactor) {
215        this.expansionPenaltyFactor = expansionPenaltyFactor;
216    }
217
218        public int getNrOfNegativeExamples() {
219                return nrOfNegativeExamples;
220        }
221
222        public void setNrOfNegativeExamples(int nrOfNegativeExamples) {
223                this.nrOfNegativeExamples = nrOfNegativeExamples;
224        }
225
226        public int getNrOfExamples() {
227                return nrOfExamples;
228        }
229
230        public void setNrOfExamples(int nrOfExamples) {
231                this.nrOfExamples = nrOfExamples;
232        }
233
234        public double getGainBonusFactor() {
235                return gainBonusFactor;
236        }
237
238        public void setGainBonusFactor(double gainBonusFactor) {
239                this.gainBonusFactor = gainBonusFactor;
240        }
241
242        public double getNodeChildPenalty() {
243                return nodeChildPenalty;
244        }
245
246        public void setNodeChildPenalty(double nodeChildPenalty) {
247                this.nodeChildPenalty = nodeChildPenalty;
248        }
249
250        public double getStartNodeBonus() {
251                return startNodeBonus;
252        }
253
254        public void setStartNodeBonus(double startNodeBonus) {
255                this.startNodeBonus = startNodeBonus;
256        }
257
258        public double getNegativeWeight() {
259                return negativeWeight;
260        }
261
262        public void setNegativeWeight(double negativeWeight) {
263                this.negativeWeight = negativeWeight;
264        }
265
266        public int getNegationPenalty() {
267                return negationPenalty;
268        }
269
270        public void setNegationPenalty(int negationPenalty) {
271                this.negationPenalty = negationPenalty;
272        }
273}