001package org.dllearner.algorithms.isle.index;
002
003import org.apache.commons.codec.digest.DigestUtils;
004import org.slf4j.Logger;
005import org.springframework.util.FileSystemUtils;
006
007import java.io.*;
008import java.net.URL;
009import java.net.URLConnection;
010import java.util.zip.ZipEntry;
011import java.util.zip.ZipInputStream;
012
013/**
014 * Provides methods to download zipped zipped files from remote locations and extracts and stores them locally.
015 * @author Daniel Fleischhacker
016 */
017public class RemoteDataProvider {
018    private final static Logger log = org.slf4j.LoggerFactory.getLogger(RemoteDataProvider.class);
019
020    public static String DATA_DIRECTORY = "tmp/";
021    private URL url;
022    private File localDirectory;
023
024    private File lastModifiedCache;
025
026    /**
027     * Initializes this downloader to fetch data from the given URL. The download process is started
028     * immediately.
029     * @param url URL to download data from
030     * @throws IOException on errors downloading or extracting the file
031     */
032    public RemoteDataProvider(URL url) throws IOException {
033        this.url = url;
034
035        log.debug("Initializing for URL '{}'", url);
036
037        log.debug("Data directory is '{}'", DATA_DIRECTORY);
038        File dataDir = new File(DATA_DIRECTORY);
039        if (!dataDir.exists()) {
040            log.debug("Data directory not yet existing, trying to create");
041            if (!dataDir.mkdirs()) {
042                throw new RuntimeException(
043                        "Unable to create temporary file directory: " + dataDir.getAbsoluteFile());
044            }
045        }
046
047        this.localDirectory = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString()));
048        log.debug("'{}' --> '{}'", url, localDirectory.getAbsolutePath());
049        this.lastModifiedCache = new File(DATA_DIRECTORY + DigestUtils.md5Hex(url.toString()) + ".last");
050
051        downloadData();
052    }
053
054    /**
055     * Downloads the file from the URL assigned to this RemoteDataProvider and extracts it into
056     * the tmp subdirectory of the current working directory. The actual path to access the data
057     * can be retrieved using {@link #getLocalDirectory()}.
058     *
059     * @throws IOException on errors downloading or extracting the file
060     */
061    private void downloadData() throws IOException {
062        String localModified = getLocalLastModified();
063
064        log.debug("Local last modified: {}", localModified);
065        boolean triggerDownload = false;
066
067        if (localModified == null) {
068            log.debug("No local last modified date found, triggering download");
069            triggerDownload = true;
070        }
071        else {
072            URLConnection conn = url.openConnection();
073            long lastModified = conn.getLastModified();
074            log.debug("Remote last modified: {}", lastModified);
075            if (!Long.valueOf(localModified).equals(lastModified)) {
076                log.debug("Last modified dates do not match, triggering download");
077                triggerDownload = true;
078            }
079        }
080
081        if (triggerDownload) {
082            deleteData();
083            if (!this.localDirectory.mkdir()) {
084                throw new RuntimeException(
085                        "Unable to create temporary file directory: " + localDirectory.getAbsoluteFile());
086            }
087            ZipInputStream zin = new ZipInputStream(this.url.openStream());
088
089            ZipEntry ze;
090            byte[] buffer = new byte[2048];
091            while ((ze = zin.getNextEntry()) != null) {
092                final String base = localDirectory.getCanonicalPath();
093                File outpath = new File(base, ze.getName());
094                if (!outpath.getCanonicalPath().startsWith(base)) {
095                    log.error("Not extracting {} because it is outside of {}", ze.getName(), base);
096                    continue;
097                }
098                if (!outpath.getParentFile().exists()) {
099                    outpath.getParentFile().mkdirs();
100                }
101                if (ze.isDirectory()) {
102                    outpath.mkdirs();
103                }
104                else {
105                    FileOutputStream output = null;
106                    try {
107                        output = new FileOutputStream(outpath);
108                        int len = 0;
109                        while ((len = zin.read(buffer)) > 0) {
110                            output.write(buffer, 0, len);
111                        }
112                    }
113                    finally {
114                        if (output != null) {
115                            output.close();
116                        }
117                    }
118                }
119            }
120            zin.close();
121
122            BufferedWriter writer = new BufferedWriter(new FileWriter(lastModifiedCache));
123            long lastModified = url.openConnection().getLastModified();
124            log.debug("Writing local last modified date: '{}'", lastModified);
125            writer.write(String.valueOf(lastModified));
126            writer.close();
127        }
128        else {
129            log.debug("Local data is up to date, skipping download");
130        }
131    }
132
133    /**
134     * Forces a redownload of the data. The data directory is first deleted and then recreated.
135     */
136    public void redownload() throws IOException {
137        deleteData();
138        downloadData();
139    }
140
141    /**
142     * Deletes the data downloaded.
143     */
144    public void deleteData() {
145        FileSystemUtils.deleteRecursively(localDirectory);
146        lastModifiedCache.delete();
147    }
148
149    /**
150     * Returns the folder to access the downloaded data. The returned File object points to the directory
151     * created for the downloaded data.
152     * @return file pointing to the downloaded data's directory
153     */
154    public File getLocalDirectory() {
155        return localDirectory;
156    }
157
158    /**
159     * Returns the URL assigned to this RemoteDataProvider
160     * @return the URL assigned to this downloader
161     */
162    public URL getUrl() {
163        return url;
164    }
165
166    /**
167     * Returns the content of the local last modified cache for this URL. If no such file exists, null is returned
168     * @return content of local last modified cache, if not existing null
169     */
170    private String getLocalLastModified() {
171        if (!lastModifiedCache.exists()) {
172            return null;
173        }
174        String res;
175        BufferedReader reader = null;
176        try {
177            reader = new BufferedReader(new FileReader(lastModifiedCache));
178            res = reader.readLine();
179            reader.close();
180            return res;
181        }
182        catch (FileNotFoundException e) {
183            return null;
184        }
185        catch (IOException e) {
186            return null;
187        }
188        finally {
189            if (reader != null) {
190                try {
191                    reader.close();
192                }
193                catch (IOException e) {
194                    log.error("Unable to close last modified cache property", e);
195                }
196            }
197        }
198    }
199
200    public static void main(String[] args) throws IOException {
201        RemoteDataProvider rid = new RemoteDataProvider(
202                new URL("http://gold.linkeddata.org/data/bible/verse_index.zip"));
203        System.out.println(rid.getLocalDirectory().getAbsolutePath());
204        RemoteDataProvider rid2 = new RemoteDataProvider(
205                new URL("http://gold.linkeddata.org/data/bible/chapter_index.zip"));
206        System.out.println(rid2.getLocalDirectory().getAbsolutePath());
207    }
208}