001/**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019
020package org.dllearner.experiments;
021
022import java.util.Random;
023import java.util.SortedSet;
024import java.util.TreeSet;
025
026import org.apache.log4j.Logger;
027
028/**
029 * used to randomize examples and split them into training and test sets
030 * gets a fixed number of examples
031 * @author Sebastian Hellmann <hellmann@informatik.uni-leipzig.de>
032 *
033 */
034public class ExMakerFixedSize {
035        @SuppressWarnings("unused")
036        private static Logger logger = Logger.getLogger(ExMakerFixedSize.class);
037
038        private final Examples examples;
039        private final boolean randomize;
040        
041        public ExMakerFixedSize(Examples examples ){
042                this(examples, true) ;
043        }
044        
045        public ExMakerFixedSize(Examples examples, boolean randomize ){
046                this.examples = examples;
047                this.randomize = randomize;
048        }
049        
050        public static void main(String[] args) {
051                Examples ex = new Examples();
052                
053                for (int i = 0; i < 20; i++) {
054                        ex.addPosTrain("p"+i);
055                        ex.addNegTrain("n"+i);
056                }
057                
058                ExMakerFixedSize r = new ExMakerFixedSize(ex);
059                ex = r.select(5, 5);
060                System.out.println(ex.toString());
061                
062        }
063        
064        /**
065         * same as select(int,int)
066         * uses both times the same number
067         * @param both
068         * @return
069         */
070        public Examples select(int both){
071                return select( both,  both);
072        }
073        
074        /**
075         * returns a new example object based on ALL (train and test) examples in the old set
076         * picks a fixed number of examples, puts them into training sets, rest to test set
077         * @param nrOfPos
078         * @param nrOfNeg
079         * @return
080         */
081        public Examples select(int nrOfPos, int nrOfNeg){
082
083                SortedSet<String> posTrain = new TreeSet<>();
084                SortedSet<String> negTrain = new TreeSet<>();
085                
086                SortedSet<String> posTest = new TreeSet<>();
087                SortedSet<String> negTest = new TreeSet<>();
088                
089                SortedSet<String> posOld = new TreeSet<>();
090                SortedSet<String> negOld = new TreeSet<>();
091                posOld.addAll(examples.getPositiveExamples());
092                negOld.addAll(examples.getNegativeExamples());
093                
094                while (!posOld.isEmpty() && posTrain.size()< nrOfPos) {
095                        String one;
096                        if(randomize){
097                                one = pickOneRandomly(posOld.toArray(new String[posOld.size()]));
098                        }else{
099                                one = posOld.first();
100                        }
101                        posOld.remove(one);
102                        posTrain.add(one);
103                }
104                posTest.addAll(posOld);
105                
106                while (!negOld.isEmpty() && negTrain.size()< nrOfNeg) {
107                        String one;
108                        if(randomize){
109                                one = pickOneRandomly(negOld.toArray(new String[negOld.size()]));
110                        }else{
111                                one = negOld.first();
112                        }
113                        negOld.remove(one);
114                        negTrain.add(one);
115                }
116                negTest.addAll(negOld);
117                
118                return new Examples(posTrain, negTrain, posTest, negTest);
119        }
120        
121        
122        public static String pickOneRandomly(String[] from){
123                Random r = new Random();
124                int index = Math.round((float)(from.length*r.nextFloat()));
125                try{
126                        return from[index];
127                }catch (Exception e) {
128                        return pickOneRandomly(from);
129                }
130        }
131        
132}