001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.kb; 020 021import org.aksw.jena_sparql_api.cache.extra.CacheFrontend; 022import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2; 023import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory; 024import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 025import org.aksw.jena_sparql_api.delay.core.QueryExecutionFactoryDelay; 026import org.aksw.jena_sparql_api.http.QueryExecutionHttpWrapper; 027import org.aksw.jena_sparql_api.retry.core.QueryExecutionFactoryRetry; 028import org.apache.jena.riot.WebContent; 029import org.apache.jena.sparql.engine.http.QueryEngineHTTP; 030import org.dllearner.core.AbstractKnowledgeSource; 031import org.dllearner.core.ComponentAnn; 032import org.dllearner.core.ComponentInitException; 033import org.dllearner.core.KnowledgeSource; 034import org.dllearner.core.annotations.NoConfigOption; 035import org.dllearner.core.config.ConfigOption; 036import org.dllearner.kb.sparql.SPARQLTasks; 037import org.dllearner.kb.sparql.SparqlEndpoint; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import java.net.URL; 042import java.util.LinkedList; 043import java.util.List; 044import java.util.concurrent.TimeUnit; 045 046/** 047 * SPARQL endpoint knowledge source (without fragment extraction), 048 * in particular for those algorithms which work directly on an endpoint 049 * without requiring an OWL reasoner. 050 * 051 * @author Jens Lehmann 052 * 053 */ 054@ComponentAnn(name = "SPARQL endpoint", shortName = "sparql", version = 0.2) 055public class SparqlEndpointKS extends AbstractKnowledgeSource { 056 057 private static final Logger logger = LoggerFactory.getLogger(SparqlEndpointKS.class); 058 059 private SparqlEndpoint endpoint; 060 @NoConfigOption 061 private CacheFrontend cache; 062 @NoConfigOption // auto-detected 063 private boolean supportsSPARQL_1_1 = false; 064 private boolean isRemote = true; 065 066 @ConfigOption(description="URL of the SPARQL endpoint", required=true) 067 private URL url; 068 069 @ConfigOption(description="a list of default graph URIs", defaultValue="{}", required=false) 070 private List<String> defaultGraphURIs = new LinkedList<>(); 071 072 @ConfigOption(description="a list of named graph URIs", defaultValue="{}", required=false) 073 private List<String> namedGraphURIs = new LinkedList<>(); 074 075 // some parameters for the query execution 076 @ConfigOption(defaultValue = "50", description = "Use this setting to avoid overloading the endpoint with a sudden burst of queries. A value below 0 means no delay.", required = false) 077 private long queryDelay = 50; 078 079 // caching options 080 @ConfigOption(defaultValue = "true", description = "Use this setting to enable caching of SPARQL queries in a local database.", required = false) 081 private boolean useCache = true; 082 083 @ConfigOption(defaultValue = "tmp folder of the system", description = "The base directory of the SPARQL query cache.", required = false) 084 protected String cacheDir = System.getProperty("java.io.tmpdir") + "/sparql-cache;COMPRESS=TRUE"; 085 086 @ConfigOption(defaultValue = "86400", description = "The time to live in milliseconds for cached SPARQL queries, if enabled. The default value is 86400s(=1 day).", required = false) 087 protected long cacheTTL = TimeUnit.DAYS.toMillis(1); 088 089 @ConfigOption(defaultValue = "3", description = "The maximum number of retries for the execution of a particular SPARQL query.", required = false) 090 protected int retryCount = 3; 091 092 protected QueryExecutionFactory qef; 093 094 @ConfigOption(defaultValue = "10 000", description = "page size", exampleValue = "10000") 095 private long pageSize = 10000; 096 097 private KnowledgeSource schema; 098 099 public SparqlEndpointKS() {} 100 101 public SparqlEndpointKS(SparqlEndpoint endpoint) { 102 this.endpoint = endpoint; 103 } 104 105 public SparqlEndpointKS(SparqlEndpoint endpoint, KnowledgeSource schema) { 106 this.endpoint = endpoint; 107 this.schema = schema; 108 } 109 110 public SparqlEndpointKS(QueryExecutionFactory qef) { 111 this.qef = qef; 112 } 113 114 public SparqlEndpointKS(SparqlEndpoint endpoint, String cacheDirectory) { 115 this.endpoint = endpoint; 116 this.cacheDir = cacheDirectory; 117 } 118 119 public CacheFrontend getCache() { 120 return cache; 121 } 122 123 public QueryExecutionFactory getQueryExecutionFactory() { 124 return qef; 125 } 126 127 /** 128 * @param cache the cache to set 129 */ 130 public void setCache(CacheFrontend cache) { 131 this.cache = cache; 132 } 133 134 public void setQueryExecutionFactory(QueryExecutionFactory qef) { 135 this.qef = qef; 136 } 137 138 @Override 139 public void init() throws ComponentInitException { 140 if(!initialized){ 141 if(isRemote()) { 142 if(endpoint == null) { 143 endpoint = new SparqlEndpoint(url, defaultGraphURIs, namedGraphURIs); 144 } 145 supportsSPARQL_1_1 = new SPARQLTasks(endpoint).supportsSPARQL_1_1(); 146 } 147 148 if(qef == null) { 149 qef = buildQueryExecutionFactory(); 150 } 151 152 initialized = true; 153 } 154 155 initialized = true; 156 logger.info("SPARQL KB setup:\n" + toString()); 157 } 158 159 protected QueryExecutionFactory buildQueryExecutionFactory() { 160 /*QueryExecutionFactory qef = new org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp( 161 endpoint.getURL().toString(), 162 endpoint.getDefaultGraphURIs());*/ 163 QueryExecutionFactory qef = FluentQueryExecutionFactory 164 .http(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()) 165 .config().withPostProcessor(qe -> ((QueryEngineHTTP) ((QueryExecutionHttpWrapper) qe).getDecoratee()) 166 .setModelContentType(WebContent.contentTypeRDFXML)) 167 .end() 168 .create(); 169 170 if(useCache) { 171 qef = CacheUtilsH2.createQueryExecutionFactory(qef, cacheDir, false, cacheTTL ); 172 } else { 173 // use in-memory cache 174 qef = CacheUtilsH2.createQueryExecutionFactory(qef, cacheDir, true, cacheTTL); 175 } 176 177 // add some delay 178 qef = new QueryExecutionFactoryDelay(qef, queryDelay); 179 180 if(retryCount > 0) { 181 qef = new QueryExecutionFactoryRetry(qef, retryCount, 1, TimeUnit.SECONDS); 182 } 183 184 // add pagination to avoid incomplete result sets due to limitations of the endpoint 185// qef = new QueryExecutionFactoryPaginated(qef, pageSize); 186 187 return qef; 188 } 189 190 public void setPageSize(long pageSize) { 191 this.pageSize = pageSize; 192 } 193 194 public SparqlEndpoint getEndpoint() { 195 return endpoint; 196 } 197 198 public URL getUrl() { 199 return url; 200 } 201 202 public void setUrl(URL url) { 203 this.url = url; 204 } 205 206 public boolean isRemote() { 207 return isRemote; 208 } 209 210 public List<String> getDefaultGraphURIs() { 211 return defaultGraphURIs; 212 } 213 214 public void setDefaultGraphURIs(List<String> defaultGraphURIs) { 215 this.defaultGraphURIs = defaultGraphURIs; 216 } 217 218 public List<String> getNamedGraphURIs() { 219 return namedGraphURIs; 220 } 221 222 public void setNamedGraphURIs(List<String> namedGraphURIs) { 223 this.namedGraphURIs = namedGraphURIs; 224 } 225 226 public boolean supportsSPARQL_1_1() { 227 return supportsSPARQL_1_1; 228 } 229 230 public void setSupportsSPARQL_1_1(boolean supportsSPARQL_1_1) { 231 this.supportsSPARQL_1_1 = supportsSPARQL_1_1; 232 } 233 234 /** 235 * Set a delay between each sent SPARQL query to avoid overloading of the 236 * endpoint. Note that this does only make sense for remote endpoints and 237 * will be ignored for local files. 238 * @param queryDelay the delay in milliseconds 239 */ 240 public void setQueryDelay(int queryDelay) { 241 this.queryDelay = queryDelay; 242 } 243 244 /** 245 * @param useCache the useCache to set 246 */ 247 public void setUseCache(boolean useCache) { 248 this.useCache = useCache; 249 } 250 251 /** 252 * Set the file-based cache directory. Default is the temporary 253 * folder of the operating system retrieved by using java.io.tmpdir, 254 * i.e. in most cases 255 * <table> 256 * <tr><th>OS</th><th>Directory</th></tr> 257 * <tr><td>Linux</td><td>/tmp/</td></tr> 258 * <tr><td>Windows</td><td>C:\temp</td></tr> 259 * </table> 260 * 261 * @param cacheDir the absolute cache directory path 262 */ 263 public void setCacheDir(String cacheDir) { 264 this.cacheDir = cacheDir; 265 } 266 267 /** 268 * Set the time-to-live for the file-based SPARQL cache. 269 * @param cacheTTL the time-to-live value in milliseconds 270 */ 271 public void setCacheTTL(long cacheTTL) { 272 this.cacheTTL = cacheTTL; 273 } 274 275 /** 276 * @return if exists, a knowledge source which contains the schema 277 */ 278 public KnowledgeSource getSchema() { 279 return schema; 280 } 281 282 public int getRetryCount() { 283 return retryCount; 284 } 285 286 public void setRetryCount(int retryCount) { 287 this.retryCount = retryCount; 288 } 289 290 @Override 291 public String toString() { 292 String out = String.format("%-15s %-25s%n", "Endpoint:", "Remote"); 293 if (qef != null) { 294 out += String.format("%-15s %-25s%n", "URL:", qef.getId()); 295 } else { 296 out += String.format("%-15s %-25s%n", "URL:", "null"); 297 } 298 out += String.format("%-15s %-25s%n", "Cache:", cacheDir); 299 out += String.format("%-15s %dms%n", "Delay:", queryDelay); 300 return out; 301 } 302 303}