001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.utilities; 020 021import java.io.BufferedReader; 022import java.io.File; 023import java.io.FileOutputStream; 024import java.io.FileReader; 025import java.io.FileWriter; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.InputStreamReader; 029import java.io.PrintWriter; 030import java.net.URL; 031import java.nio.channels.Channels; 032import java.nio.channels.ReadableByteChannel; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.Set; 036 037/** 038 * A map of prefixes based on http://prefix.cc/ service. The prefixes are cached locally in 039 * src/main/resources/prefixes.csv. An update of this file has to be forced manually by running the main method 040 * of this class. 041 * 042 * @author Lorenz Buehmann 043 */ 044public class PrefixCCMap extends HashMap<String, String>{ 045 046 private static final String LOCAL_FILE = "prefixes.csv"; 047 048 private static PrefixCCMap instance; 049 050 private PrefixCCMap(){ 051 fillMap(); 052 } 053 054 private void fillMap() { 055 try(InputStream is = this.getClass().getClassLoader().getResourceAsStream(LOCAL_FILE)) { 056 try(BufferedReader bufRdr = new BufferedReader(new InputStreamReader(is))) { 057 String line = null; 058 String key = null; 059 String value = null; 060 while ((line = bufRdr.readLine()) != null) { 061 String[] entry = line.split(","); 062 if(entry.length == 2){ 063 key = entry[0].trim(); 064 value = entry[1].trim(); 065 066 put(key, value); 067 } 068 } 069 } 070 } catch (IOException e) { 071 e.printStackTrace(); 072 } 073 } 074 075 public static PrefixCCMap getInstance(){ 076 if(instance == null){ 077 instance = new PrefixCCMap(); 078 } 079 return instance; 080 } 081 082 /** 083 * This main methods updates the local prefix file by loading latest prefix list from prefix.cc. 084 * @param args the arguments 085 * @throws IOException 086 */ 087 public static void main(String[] args) throws IOException { 088 //load latest file from prefix.cc 089 String target = "src/main/resources/prefixes.csv"; 090 URL google = new URL("http://prefix.cc/popular/all.file.csv"); 091 ReadableByteChannel rbc = Channels.newChannel(google.openStream()); 092 File file = new File(target); 093 if(!file.exists()){ 094 file.createNewFile(); 095 } 096 FileOutputStream fos = new FileOutputStream(file); 097 fos.getChannel().transferFrom(rbc, 0, 1 << 24); 098 fos.close(); 099 100 //Reload file and filter entries where second argument is empty 101 File tmpFile = new File(target + ".tmp"); 102 File inFile = new File(target); 103 PrintWriter pw = new PrintWriter(new FileWriter(tmpFile)); 104 BufferedReader br = new BufferedReader(new FileReader(inFile)); 105 String line = null; 106 Set<String> values = new HashSet<>(); 107 while ((line = br.readLine()) != null) { 108 String[] entry = line.split(","); 109 if(entry.length == 2){ 110 String key = entry[0]; 111 String value = entry[1]; 112 value = value.substring(1); 113 value = value.substring(0, value.length()-1); 114 if(!value.trim().isEmpty() && !values.contains(value)){ 115 values.add(value); 116 pw.println(entry[0] + "," + value); 117 pw.flush(); 118 } 119 } 120 } 121 pw.close(); 122 br.close(); 123 124 inFile.delete(); 125 tmpFile.renameTo(inFile); 126 } 127 128}