001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb.sparql; 020 021import com.jamonapi.Monitor; 022import org.apache.log4j.Logger; 023import org.dllearner.utilities.Files; 024import org.dllearner.utilities.JamonMonitorLogger; 025 026import java.io.*; 027import java.security.MessageDigest; 028import java.security.NoSuchAlgorithmException; 029import java.util.LinkedList; 030 031/** 032 * SPARQL query cache to avoid possibly expensive multiple queries. The queries 033 * and their results are written to files. A cache has an associated cache 034 * directory where all files are written. 035 * 036 * Each SPARQL query and its result is written to one file. The name of this 037 * file is a hash of the query. The result of the query is written as JSON 038 * serialisation of the SPARQL XML result, see 039 * http://www.w3.org/TR/rdf-sparql-json-res/. 040 * 041 * Apart from the query and its result, a timestamp of the query is stored. 042 * After a configurable amount of time, query results are considered outdated. 043 * If a cached result of a SPARQL query exists, but is too old, the cache 044 * behaves as if the cached result would not exist. 045 * 046 * TODO: We are doing md5 hashing at the moment, so in rare cases different 047 * SPARQL queries can be mapped to the same file. Support for such scenarios 048 * needs to be included. 049 * 050 * @author Sebastian Hellmann 051 * @author Sebastian Knappe 052 * @author Jens Lehmann 053 */ 054public class Cache implements Serializable { 055 056 private static Logger logger = Logger.getLogger(Cache.class); 057 058 // true = H2 embedded database is used; false = stored in files 059 private boolean useDatabase = false; 060 private ExtractionDBCache h2; 061 062 private static final long serialVersionUID = 843308736471742205L; 063 064 // maps hash of a SPARQL queries to JSON representation 065 // of its results; this 066 // private HashMap<String, String> hm; 067 068 private transient String cacheDir = ""; 069 private transient String fileEnding = ".cache"; 070 // private long timestamp; 071 072 // specifies after how many seconds a cached result becomes invalid 073 private long freshnessSeconds = 15 * 24 * 60 * 60; 074 075 /** 076 * same ad Cache(String) default is "cache" 077 */ 078 /*public Cache() { 079 this("cache"); 080 } */ 081 082 /** 083 * A Persistant cache is stored in the folder cachePersistant. 084 * It has longer freshness 365 days and is mainly usefull for developing 085 * @return a Cache onject 086 */ 087 public static Cache getPersistentCache(){ 088 Cache c = new Cache(getPersistantCacheDir()); 089 c.setFreshnessInDays(365); 090 return c; 091 } 092 093 /** 094 * @return the default cache object 095 */ 096 public static Cache getDefaultCache(){ 097 return new Cache( getDefaultCacheDir()); 098 } 099 100 /** 101 * the default cachedir normally is "cache". 102 * @return Default Cache Dir 103 */ 104 public static String getDefaultCacheDir(){ 105 return "cache"; 106 } 107 108 /** 109 * a more persistant cache used for example generation."cachePersistant" 110 * @return persistant Cache Dir 111 */ 112 public static String getPersistantCacheDir(){ 113 return "cachePersistant"; 114 } 115 116 /** 117 * Constructor for the cache itself. 118 * 119 * @param cacheDir 120 * Where the base path to the cache is . 121 */ 122 public Cache(String cacheDir) { 123 this(cacheDir, false); 124 } 125 126 public Cache(String cacheDir, boolean useDatabase) { 127 this.cacheDir = cacheDir + File.separator; 128 this.useDatabase = useDatabase; 129 if (!new File(cacheDir).exists()) { 130 Files.mkdir(cacheDir); 131 logger.info("Created directory: " + cacheDir + "."); 132 } 133 134 if(this.useDatabase) { 135 h2 = new ExtractionDBCache(cacheDir); 136 } 137 } 138 139 // compute md5-hash 140 private String getHash(String string) { 141 Monitor hashTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "HashTime").start(); 142 // calculate md5 hash of the string (code is somewhat 143 // difficult to read, but there doesn't seem to be a 144 // single function call in Java for md5 hashing) 145 MessageDigest md5 = null; 146 try { 147 md5 = MessageDigest.getInstance("MD5"); 148 } catch (NoSuchAlgorithmException e) { 149 e.printStackTrace(); 150 } 151 md5.reset(); 152 md5.update(string.getBytes()); 153 byte[] result = md5.digest(); 154 155 StringBuffer hexString = new StringBuffer(); 156 for (byte aResult : result) { 157 hexString.append(Integer.toHexString(0xFF & aResult)); 158 } 159 String str = hexString.toString(); 160 hashTime.stop(); 161 return str; 162 } 163 164 // return filename where the query result should be saved 165 private String getFilename(String sparqlQuery) { 166 return cacheDir + getHash(sparqlQuery) + fileEnding; 167 } 168 169 /** 170 * Gets a result for a query if it is in the cache. 171 * 172 * @param sparqlQuery 173 * SPARQL query to check. 174 * @return Query result as JSON or null if no result has been found or it is 175 * outdated. 176 */ 177 @SuppressWarnings({"unchecked"}) 178 private String getCacheEntry(String sparqlQuery) { 179 180 String filename = getFilename(sparqlQuery); 181 File file = new File(filename); 182 183 // return null (indicating no result) if file does not exist 184 if(!file.exists()) { 185 return null; 186 } 187 188 189 LinkedList<Object> entry = null; 190 try { 191 FileInputStream fos = new FileInputStream(filename); 192 ObjectInputStream o = new ObjectInputStream(fos); 193 entry = (LinkedList<Object>) o.readObject(); 194 o.close(); 195 } catch (IOException | ClassNotFoundException e) { 196 e.printStackTrace(); 197 if(Files.debug){System.exit(0);} 198 } 199 200 // TODO: we need to check whether the query is correct 201 // (may not always be the case due to md5 hashing) 202 203 // determine whether query is outdated 204 long timestamp = (Long) entry.get(0); 205 boolean fresh = checkFreshness(timestamp); 206 207 if(!fresh) { 208 // delete file 209 file.delete(); 210 // return null indicating no result 211 return null; 212 } 213 214 return (String) entry.get(2); 215 } 216 217 218 219 /** 220 * Adds an entry to the cache. 221 * 222 * @param sparqlQuery 223 * The SPARQL query. 224 * @param result 225 * Result of the SPARQL query. 226 */ 227 private void addToCache(String sparqlQuery, String result) { 228 String filename = getFilename(sparqlQuery); 229 long timestamp = System.currentTimeMillis(); 230 231 // create the object which will be serialised 232 LinkedList<Object> list = new LinkedList<>(); 233 list.add(timestamp); 234 list.add(sparqlQuery); 235 list.add(result); 236 237 // create the file we want to use 238 //File file = new File(filename); 239 FileOutputStream fos = null; 240 ObjectOutputStream o = null; 241 try { 242 //file.createNewFile(); 243 fos = new FileOutputStream(filename, false); 244 o = new ObjectOutputStream(fos); 245 o.writeObject(list); 246 fos.flush(); 247 248 } catch (IOException e) { 249 e.printStackTrace(); 250 }finally{ 251 try{ 252 fos.close(); 253 o.close(); 254 }catch (Exception e2) { 255 e2.printStackTrace(); 256 } 257 } 258 } 259 260 // check whether the given timestamp is fresh 261 private boolean checkFreshness(long timestamp) { 262 return ((System.currentTimeMillis() - timestamp) <= (freshnessSeconds * 1000)); 263 } 264 265 /** 266 * Takes a SPARQL query (which has not been evaluated yet) as argument and 267 * returns a JSON result set. The result set is taken from this cache if the 268 * query is stored here. Otherwise the query is send and its result added to 269 * the cache and returned. Convenience method. 270 * 271 * @param query 272 * The SPARQL query. 273 * @return Jena result set in JSON format 274 */ 275 public String executeSparqlQuery(SparqlQuery query) { 276 if(useDatabase) { 277 return h2.executeSelectQuery(query.getSparqlEndpoint(), query.getSparqlQueryString()); 278 } 279 280 Monitor totaltime =JamonMonitorLogger.getTimeMonitor(Cache.class, "TotalTimeExecuteSparqlQuery").start(); 281 JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries"); 282 283 Monitor readTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "ReadTime").start(); 284 String result = getCacheEntry(query.getSparqlQueryString()); 285 readTime.stop(); 286 287 if (result != null) { 288// query.setJson(result); 289// 290// query.setRunning(false); 291// SparqlQuery.writeToSparqlLog("***********\nJSON retrieved from cache"); 292// SparqlQuery.writeToSparqlLog("wget -S -O - '\n"+query.getSparqlEndpoint().getHTTPRequest()); 293// SparqlQuery.writeToSparqlLog(query.getSparqlQueryString()); 294 295 //SparqlQuery.writeToSparqlLog("JSON: "+result); 296 JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits"); 297 298 } else { 299 300 //ResultSet rs= query.send(); 301 query.send(); 302 String json = query.getJson(); 303 if (json!=null){ 304 addToCache(query.getSparqlQueryString(), json); 305// SparqlQuery.writeToSparqlLog("result added to cache: "+json); 306 logger.debug("result added to SPARQL cache: "+json); 307 result=json; 308 //query.setJson(result); 309 } else { 310 json=""; 311 result=""; 312 logger.warn(Cache.class.getSimpleName()+"empty result: "+query.getSparqlQueryString()); 313 314 } 315 316 //return json; 317 } 318 totaltime.stop(); 319 return result; 320 } 321 322 public boolean executeSparqlAskQuery(SparqlQuery query) { 323 String str = getCacheEntry(query.getSparqlQueryString()); 324 JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries"); 325 if(str != null) { 326 JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits"); 327 return Boolean.parseBoolean(str); 328 } else { 329 Boolean result = query.sendAsk(); 330 addToCache(query.getSparqlQueryString(), result.toString()); 331 return result; 332 } 333 } 334 335 /** 336 * deletes all Files in the cacheDir, does not delete the cacheDir itself, 337 * and can thus still be used without creating a new Cache Object 338 */ 339 public void clearCache() { 340 341 File f = new File(cacheDir); 342 String[] files = f.list(); 343 for (String file : files) { 344 Files.deleteFile(new File(cacheDir + "/" + file)); 345 } 346 } 347 348 /** 349 * Changes how long cached results will stay fresh (default 15 days). 350 * @param days number of days 351 */ 352 public void setFreshnessInDays(int days){ 353 freshnessSeconds = days * 24 * 60 * 60; 354 } 355 356}