001package org.dllearner.algorithms.isle.index; 002 003import org.apache.commons.codec.digest.DigestUtils; 004import org.slf4j.Logger; 005import org.springframework.util.FileSystemUtils; 006 007import java.io.*; 008import java.net.URL; 009import java.net.URLConnection; 010import java.util.zip.ZipEntry; 011import java.util.zip.ZipInputStream; 012 013/** 014 * Provides methods to download zipped zipped files from remote locations and extracts and stores them locally. 015 * @author Daniel Fleischhacker 016 */ 017public class RemoteDataProvider { 018 private final static Logger log = org.slf4j.LoggerFactory.getLogger(RemoteDataProvider.class); 019 020 public static String DATA_DIRECTORY = "tmp/"; 021 private URL url; 022 private File localDirectory; 023 024 private File lastModifiedCache; 025 026 /** 027 * Initializes this downloader to fetch data from the given URL. The download process is started 028 * immediately. 029 * @param url URL to download data from 030 * @throws IOException on errors downloading or extracting the file 031 */ 032 public RemoteDataProvider(URL url) throws IOException { 033 this.url = url; 034 035 log.debug("Initializing for URL '{}'", url); 036 037 log.debug("Data directory is '{}'", DATA_DIRECTORY); 038 File dataDir = new File(DATA_DIRECTORY); 039 if (!dataDir.exists()) { 040 log.debug("Data directory not yet existing, trying to create"); 041 if (!dataDir.mkdirs()) { 042 throw new RuntimeException( 043 "Unable to create temporary file directory: " + dataDir.getAbsoluteFile()); 044 } 045 } 046 047 this.localDirectory = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString())); 048 log.debug("'{}' --> '{}'", url, localDirectory.getAbsolutePath()); 049 this.lastModifiedCache = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString()) + ".last"); 050 051 downloadData(); 052 } 053 054 /** 055 * Downloads the file from the URL assigned to this RemoteDataProvider and extracts it into 056 * the tmp subdirectory of the current working directory. The actual path to access the data 057 * can be retrieved using {@link #getLocalDirectory()}. 058 * 059 * @throws IOException on errors downloading or extracting the file 060 */ 061 private void downloadData() throws IOException { 062 String localModified = getLocalLastModified(); 063 064 log.debug("Local last modified: {}", localModified); 065 boolean triggerDownload = false; 066 067 if (localModified == null) { 068 log.debug("No local last modified date found, triggering download"); 069 triggerDownload = true; 070 } 071 else { 072 URLConnection conn = url.openConnection(); 073 long lastModified = conn.getLastModified(); 074 log.debug("Remote last modified: {}", lastModified); 075 if (!Long.valueOf(localModified).equals(lastModified)) { 076 log.debug("Last modified dates do not match, triggering download"); 077 triggerDownload = true; 078 } 079 } 080 081 if (triggerDownload) { 082 deleteData(); 083 if (!this.localDirectory.mkdir()) { 084 throw new RuntimeException( 085 "Unable to create temporary file directory: " + localDirectory.getAbsoluteFile()); 086 } 087 ZipInputStream zin = new ZipInputStream(this.url.openStream()); 088 089 ZipEntry ze; 090 byte[] buffer = new byte[2048]; 091 while ((ze = zin.getNextEntry()) != null) { 092 final String base = localDirectory.getCanonicalPath(); 093 File outpath = new File(base, ze.getName()); 094 if (!outpath.getCanonicalPath().startsWith(base)) { 095 log.error("Not extracting {} because it is outside of {}", ze.getName(), base); 096 continue; 097 } 098 if (!outpath.getParentFile().exists()) { 099 outpath.getParentFile().mkdirs(); 100 } 101 if (ze.isDirectory()) { 102 outpath.mkdirs(); 103 } 104 else { 105 FileOutputStream output = null; 106 try { 107 output = new FileOutputStream(outpath); 108 int len = 0; 109 while ((len = zin.read(buffer)) > 0) { 110 output.write(buffer, 0, len); 111 } 112 } 113 finally { 114 if (output != null) { 115 output.close(); 116 } 117 } 118 } 119 } 120 zin.close(); 121 122 BufferedWriter writer = new BufferedWriter(new FileWriter(lastModifiedCache)); 123 long lastModified = url.openConnection().getLastModified(); 124 log.debug("Writing local last modified date: '{}'", lastModified); 125 writer.write(String.valueOf(lastModified)); 126 writer.close(); 127 } 128 else { 129 log.debug("Local data is up to date, skipping download"); 130 } 131 } 132 133 /** 134 * Forces a redownload of the data. The data directory is first deleted and then recreated. 135 */ 136 public void redownload() throws IOException { 137 deleteData(); 138 downloadData(); 139 } 140 141 /** 142 * Deletes the data downloaded. 143 */ 144 public void deleteData() { 145 FileSystemUtils.deleteRecursively(localDirectory); 146 lastModifiedCache.delete(); 147 } 148 149 /** 150 * Returns the folder to access the downloaded data. The returned File object points to the directory 151 * created for the downloaded data. 152 * @return file pointing to the downloaded data's directory 153 */ 154 public File getLocalDirectory() { 155 return localDirectory; 156 } 157 158 /** 159 * Returns the URL assigned to this RemoteDataProvider 160 * @return the URL assigned to this downloader 161 */ 162 public URL getUrl() { 163 return url; 164 } 165 166 /** 167 * Returns the content of the local last modified cache for this URL. If no such file exists, null is returned 168 * @return content of local last modified cache, if not existing null 169 */ 170 private String getLocalLastModified() { 171 if (!lastModifiedCache.exists()) { 172 return null; 173 } 174 String res; 175 BufferedReader reader = null; 176 try { 177 reader = new BufferedReader(new FileReader(lastModifiedCache)); 178 res = reader.readLine(); 179 reader.close(); 180 return res; 181 } 182 catch (FileNotFoundException e) { 183 return null; 184 } 185 catch (IOException e) { 186 return null; 187 } 188 finally { 189 if (reader != null) { 190 try { 191 reader.close(); 192 } 193 catch (IOException e) { 194 log.error("Unable to close last modified cache property", e); 195 } 196 } 197 } 198 } 199 200 public static void main(String[] args) throws IOException { 201 RemoteDataProvider rid = new RemoteDataProvider( 202 new URL("http://gold.linkeddata.org/data/bible/verse_index.zip")); 203 System.out.println(rid.getLocalDirectory().getAbsolutePath()); 204 RemoteDataProvider rid2 = new RemoteDataProvider( 205 new URL("http://gold.linkeddata.org/data/bible/chapter_index.zip")); 206 System.out.println(rid2.getLocalDirectory().getAbsolutePath()); 207 } 208}