001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb.sparql;
020
021import com.jamonapi.Monitor;
022import org.apache.log4j.Logger;
023import org.dllearner.utilities.Files;
024import org.dllearner.utilities.JamonMonitorLogger;
025
026import java.io.*;
027import java.security.MessageDigest;
028import java.security.NoSuchAlgorithmException;
029import java.util.LinkedList;
030
031/**
032 * SPARQL query cache to avoid possibly expensive multiple queries. The queries
033 * and their results are written to files. A cache has an associated cache
034 * directory where all files are written.
035 * 
036 * Each SPARQL query and its result is written to one file. The name of this
037 * file is a hash of the query. The result of the query is written as JSON
038 * serialisation of the SPARQL XML result, see
039 * http://www.w3.org/TR/rdf-sparql-json-res/.
040 * 
041 * Apart from the query and its result, a timestamp of the query is stored.
042 * After a configurable amount of time, query results are considered outdated.
043 * If a cached result of a SPARQL query exists, but is too old, the cache
044 * behaves as if the cached result would not exist.
045 * 
046 * TODO: We are doing md5 hashing at the moment, so in rare cases different
047 * SPARQL queries can be mapped to the same file. Support for such scenarios
048 * needs to be included.
049 * 
050 * @author Sebastian Hellmann
051 * @author Sebastian Knappe
052 * @author Jens Lehmann
053 */
054public class Cache implements Serializable {
055
056        private static Logger logger = Logger.getLogger(Cache.class);
057        
058        // true = H2 embedded database is used; false = stored in files
059        private boolean useDatabase = false;
060        private ExtractionDBCache h2;
061
062        private static final long serialVersionUID = 843308736471742205L;
063
064        // maps hash of a SPARQL queries to JSON representation
065        // of its results; this
066        // private HashMap<String, String> hm;
067
068        private transient String cacheDir = "";
069        private transient String fileEnding = ".cache";
070        // private long timestamp;
071
072        // specifies after how many seconds a cached result becomes invalid
073        private long freshnessSeconds = 15 * 24 * 60 * 60;
074
075        /**
076         *  same ad Cache(String) default is "cache"
077         */
078        /*public Cache() {
079                this("cache");
080        } */
081        
082        /**
083         * A Persistant cache is stored in the folder cachePersistant.
084         * It has longer freshness 365 days and is mainly usefull for developing
085         * @return a Cache onject
086         */
087        public static Cache getPersistentCache(){
088                Cache c = new Cache(getPersistantCacheDir()); 
089                c.setFreshnessInDays(365);
090                return c;
091        }
092        
093        /**
094         * @return the default cache object
095         */
096        public static Cache getDefaultCache(){
097                return new Cache( getDefaultCacheDir());
098        }
099        
100        /**
101         * the default cachedir normally is "cache".
102         * @return Default Cache Dir
103         */
104        public static String getDefaultCacheDir(){
105                return "cache";
106        }
107        
108        /**
109         * a more persistant cache used for example generation."cachePersistant"
110         * @return persistant Cache Dir
111         */
112        public static String getPersistantCacheDir(){
113                return "cachePersistant";
114        }
115        
116        /**
117         * Constructor for the cache itself.
118         * 
119         * @param cacheDir
120         *            Where the base path to the cache is .
121         */
122        public Cache(String cacheDir) {
123                this(cacheDir, false);
124        }
125
126        public Cache(String cacheDir, boolean useDatabase) {
127                this.cacheDir = cacheDir + File.separator;
128                this.useDatabase = useDatabase;
129                if (!new File(cacheDir).exists()) {
130                        Files.mkdir(cacheDir);
131                        logger.info("Created directory: " + cacheDir + ".");
132                }
133                
134                if(this.useDatabase) {
135                        h2 = new ExtractionDBCache(cacheDir);
136                }               
137        }
138        
139        // compute md5-hash
140        private String getHash(String string) {
141                Monitor hashTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "HashTime").start();
142                // calculate md5 hash of the string (code is somewhat
143                // difficult to read, but there doesn't seem to be a
144                // single function call in Java for md5 hashing)
145                MessageDigest md5 = null;
146                try {
147                        md5 = MessageDigest.getInstance("MD5");
148                } catch (NoSuchAlgorithmException e) {
149                        e.printStackTrace();
150                }
151                md5.reset();
152                md5.update(string.getBytes());
153                byte[] result = md5.digest();
154
155                StringBuffer hexString = new StringBuffer();
156                for (byte aResult : result) {
157                        hexString.append(Integer.toHexString(0xFF & aResult));
158                }
159                String str = hexString.toString();
160                hashTime.stop();
161                return str;
162        }
163
164        // return filename where the query result should be saved
165        private String getFilename(String sparqlQuery) {
166                return cacheDir + getHash(sparqlQuery) + fileEnding;
167        }
168
169        /**
170         * Gets a result for a query if it is in the cache.
171         * 
172         * @param sparqlQuery
173         *            SPARQL query to check.
174         * @return Query result as JSON or null if no result has been found or it is
175         *         outdated.
176         */
177        @SuppressWarnings({"unchecked"})
178        private String getCacheEntry(String sparqlQuery) {
179                
180                String filename = getFilename(sparqlQuery);
181                File file = new File(filename);
182                
183                // return null (indicating no result) if file does not exist
184                if(!file.exists()) {
185                        return null;
186                }
187                        
188                
189                LinkedList<Object> entry = null;
190                try {
191                        FileInputStream fos = new FileInputStream(filename);
192                        ObjectInputStream o = new ObjectInputStream(fos);
193                        entry = (LinkedList<Object>) o.readObject();
194                        o.close();
195                } catch (IOException | ClassNotFoundException e) {
196                        e.printStackTrace();
197                        if(Files.debug){System.exit(0);}
198                }
199
200                // TODO: we need to check whether the query is correct
201                // (may not always be the case due to md5 hashing)
202                
203                // determine whether query is outdated
204                long timestamp = (Long) entry.get(0);
205                boolean fresh = checkFreshness(timestamp);
206                
207                if(!fresh) {
208                        // delete file
209                        file.delete();
210                        // return null indicating no result
211                        return null;
212                }
213                
214                return (String) entry.get(2);
215        }
216        
217        
218
219        /**
220         * Adds an entry to the cache.
221         * 
222         * @param sparqlQuery
223         *            The SPARQL query.
224         * @param result
225         *            Result of the SPARQL query.
226         */
227        private void addToCache(String sparqlQuery, String result) {
228                String filename = getFilename(sparqlQuery);
229                long timestamp = System.currentTimeMillis();
230
231                // create the object which will be serialised
232                LinkedList<Object> list = new LinkedList<>();
233                list.add(timestamp);
234                list.add(sparqlQuery);
235                list.add(result);
236
237                // create the file we want to use
238                //File file = new File(filename);
239                FileOutputStream fos = null;
240                ObjectOutputStream o = null;
241                try {
242                        //file.createNewFile();
243                        fos = new FileOutputStream(filename, false);
244                        o = new ObjectOutputStream(fos);
245                        o.writeObject(list);
246                        fos.flush();
247                        
248                } catch (IOException e) {
249                        e.printStackTrace();
250                }finally{
251                        try{
252                                fos.close();
253                                o.close();
254                        }catch (Exception e2) {
255                                 e2.printStackTrace();
256                        }
257                }
258        }
259
260        // check whether the given timestamp is fresh
261        private boolean checkFreshness(long timestamp) {
262                return ((System.currentTimeMillis() - timestamp) <= (freshnessSeconds * 1000));
263        }
264
265        /**
266         * Takes a SPARQL query (which has not been evaluated yet) as argument and
267         * returns a JSON result set. The result set is taken from this cache if the
268         * query is stored here. Otherwise the query is send and its result added to
269         * the cache and returned. Convenience method.
270         * 
271         * @param query
272         *            The SPARQL query.
273         * @return Jena result set in JSON format
274         */
275        public String executeSparqlQuery(SparqlQuery query) {
276                if(useDatabase) {
277                        return h2.executeSelectQuery(query.getSparqlEndpoint(), query.getSparqlQueryString());
278                }
279                
280                Monitor totaltime =JamonMonitorLogger.getTimeMonitor(Cache.class, "TotalTimeExecuteSparqlQuery").start();
281                JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
282        
283                Monitor readTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "ReadTime").start();
284                String result = getCacheEntry(query.getSparqlQueryString());
285                readTime.stop();
286                
287                if (result != null) {
288//                      query.setJson(result);
289//                      
290//                  query.setRunning(false);
291//                      SparqlQuery.writeToSparqlLog("***********\nJSON retrieved from cache");
292//                      SparqlQuery.writeToSparqlLog("wget -S -O - '\n"+query.getSparqlEndpoint().getHTTPRequest());
293//                      SparqlQuery.writeToSparqlLog(query.getSparqlQueryString());
294                        
295                        //SparqlQuery.writeToSparqlLog("JSON: "+result);
296                        JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
297                        
298                } else {
299                        
300                        //ResultSet rs= query.send();
301                        query.send();
302                        String json = query.getJson();
303                        if (json!=null){
304                                addToCache(query.getSparqlQueryString(), json);
305//                              SparqlQuery.writeToSparqlLog("result added to cache: "+json);
306                                logger.debug("result added to SPARQL cache: "+json);
307                                result=json;
308                                //query.setJson(result);
309                        } else {
310                                json="";
311                                result="";
312                                logger.warn(Cache.class.getSimpleName()+"empty result: "+query.getSparqlQueryString());
313                                
314                        }
315                        
316                        //return json;
317                }
318                totaltime.stop();
319                return result;
320        }
321                
322        public boolean executeSparqlAskQuery(SparqlQuery query) {
323                String str = getCacheEntry(query.getSparqlQueryString());
324                JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
325                if(str != null) {
326                        JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
327                        return Boolean.parseBoolean(str);
328                } else {
329                        Boolean result = query.sendAsk();
330                        addToCache(query.getSparqlQueryString(), result.toString());
331                        return result;
332                }
333        }
334        
335        /**
336         * deletes all Files in the cacheDir, does not delete the cacheDir itself, 
337         * and can thus still be used without creating a new Cache Object
338         */
339        public void clearCache() {
340                
341                        File f = new File(cacheDir);
342                    String[] files = f.list();
343                for (String file : files) {
344                        Files.deleteFile(new File(cacheDir + "/" + file));
345                }
346        }
347        
348        /**
349         * Changes how long cached results will stay fresh (default 15 days).
350         * @param days number of days
351         */
352        public void setFreshnessInDays(int days){
353                freshnessSeconds = days * 24 * 60 * 60;
354        }
355
356}