001/** 002 * Copyright (C) 2007-2011, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019 020package org.dllearner.experiments; 021 022import java.util.Random; 023import java.util.SortedSet; 024import java.util.TreeSet; 025 026import org.apache.log4j.Logger; 027 028/** 029 * used to randomize examples and split them into training and test sets 030 * gets a fixed number of examples 031 * @author Sebastian Hellmann <hellmann@informatik.uni-leipzig.de> 032 * 033 */ 034public class ExMakerFixedSize { 035 @SuppressWarnings("unused") 036 private static Logger logger = Logger.getLogger(ExMakerFixedSize.class); 037 038 private final Examples examples; 039 private final boolean randomize; 040 041 public ExMakerFixedSize(Examples examples ){ 042 this(examples, true) ; 043 } 044 045 public ExMakerFixedSize(Examples examples, boolean randomize ){ 046 this.examples = examples; 047 this.randomize = randomize; 048 } 049 050 public static void main(String[] args) { 051 Examples ex = new Examples(); 052 053 for (int i = 0; i < 20; i++) { 054 ex.addPosTrain("p"+i); 055 ex.addNegTrain("n"+i); 056 } 057 058 ExMakerFixedSize r = new ExMakerFixedSize(ex); 059 ex = r.select(5, 5); 060 System.out.println(ex.toString()); 061 062 } 063 064 /** 065 * same as select(int,int) 066 * uses both times the same number 067 * @param both 068 * @return 069 */ 070 public Examples select(int both){ 071 return select( both, both); 072 } 073 074 /** 075 * returns a new example object based on ALL (train and test) examples in the old set 076 * picks a fixed number of examples, puts them into training sets, rest to test set 077 * @param nrOfPos 078 * @param nrOfNeg 079 * @return 080 */ 081 public Examples select(int nrOfPos, int nrOfNeg){ 082 083 SortedSet<String> posTrain = new TreeSet<>(); 084 SortedSet<String> negTrain = new TreeSet<>(); 085 086 SortedSet<String> posTest = new TreeSet<>(); 087 SortedSet<String> negTest = new TreeSet<>(); 088 089 SortedSet<String> posOld = new TreeSet<>(); 090 SortedSet<String> negOld = new TreeSet<>(); 091 posOld.addAll(examples.getPositiveExamples()); 092 negOld.addAll(examples.getNegativeExamples()); 093 094 while (!posOld.isEmpty() && posTrain.size()< nrOfPos) { 095 String one; 096 if(randomize){ 097 one = pickOneRandomly(posOld.toArray(new String[posOld.size()])); 098 }else{ 099 one = posOld.first(); 100 } 101 posOld.remove(one); 102 posTrain.add(one); 103 } 104 posTest.addAll(posOld); 105 106 while (!negOld.isEmpty() && negTrain.size()< nrOfNeg) { 107 String one; 108 if(randomize){ 109 one = pickOneRandomly(negOld.toArray(new String[negOld.size()])); 110 }else{ 111 one = negOld.first(); 112 } 113 negOld.remove(one); 114 negTrain.add(one); 115 } 116 negTest.addAll(negOld); 117 118 return new Examples(posTrain, negTrain, posTest, negTest); 119 } 120 121 122 public static String pickOneRandomly(String[] from){ 123 Random r = new Random(); 124 int index = Math.round((float)(from.length*r.nextFloat())); 125 try{ 126 return from[index]; 127 }catch (Exception e) { 128 return pickOneRandomly(from); 129 } 130 } 131 132}