001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.utilities;
020
021import java.io.BufferedReader;
022import java.io.File;
023import java.io.FileOutputStream;
024import java.io.FileReader;
025import java.io.FileWriter;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.InputStreamReader;
029import java.io.PrintWriter;
030import java.net.URL;
031import java.nio.channels.Channels;
032import java.nio.channels.ReadableByteChannel;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Set;
036
037/**
038 * A map of prefixes based on http://prefix.cc/ service. The prefixes are cached locally in
039 * src/main/resources/prefixes.csv. An update of this file has to be forced manually by running the main method
040 * of this class.
041 *
042 * @author Lorenz Buehmann
043 */
044public class PrefixCCMap extends HashMap<String, String>{
045        
046        private static final String LOCAL_FILE = "prefixes.csv";
047        
048        private static PrefixCCMap instance;
049        
050        private PrefixCCMap(){
051                fillMap();
052        }
053        
054        private void fillMap() {
055                try(InputStream is = this.getClass().getClassLoader().getResourceAsStream(LOCAL_FILE)) {
056                        try(BufferedReader bufRdr = new BufferedReader(new InputStreamReader(is))) {
057                                String line = null;
058                                String key = null;
059                                String value = null;
060                                while ((line = bufRdr.readLine()) != null) {
061                                        String[] entry = line.split(",");
062                                        if(entry.length == 2){
063                                                key = entry[0].trim();
064                                                value = entry[1].trim();
065
066                                                put(key, value);
067                                        }
068                                }
069                        }
070                } catch (IOException e) {
071                        e.printStackTrace();
072                }
073        }
074        
075        public static PrefixCCMap getInstance(){
076                if(instance == null){
077                        instance = new PrefixCCMap();
078                }
079                return instance;
080        }
081        
082        /**
083         * This main methods updates the local prefix file by loading latest prefix list from prefix.cc.
084         * @param args the arguments
085         * @throws IOException
086         */
087        public static void main(String[] args) throws IOException {
088                //load latest file from prefix.cc
089                String target = "src/main/resources/prefixes.csv";
090                URL google = new URL("http://prefix.cc/popular/all.file.csv");
091            ReadableByteChannel rbc = Channels.newChannel(google.openStream());
092            File file = new File(target);
093            if(!file.exists()){
094                file.createNewFile();
095            }
096            FileOutputStream fos = new FileOutputStream(file);
097            fos.getChannel().transferFrom(rbc, 0, 1 << 24);
098            fos.close();
099            
100            //Reload file and filter entries where second argument is empty
101            File tmpFile = new File(target + ".tmp");
102            File inFile = new File(target);
103            PrintWriter pw = new PrintWriter(new FileWriter(tmpFile));
104            BufferedReader br = new BufferedReader(new FileReader(inFile));
105                String line = null;
106                Set<String> values = new HashSet<>();
107                while ((line = br.readLine()) != null) {
108                        String[] entry = line.split(",");
109                        if(entry.length == 2){
110                                String key = entry[0];
111                                String value = entry[1];
112                                value = value.substring(1);
113                                value = value.substring(0, value.length()-1);
114                                if(!value.trim().isEmpty() && !values.contains(value)){
115                                        values.add(value);
116                                        pw.println(entry[0] + "," + value);
117                                        pw.flush();
118                                }
119                        }
120                }
121                pw.close();
122            br.close();
123            
124            inFile.delete();
125            tmpFile.renameTo(inFile);
126        }
127
128}