001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.kb;
020
021import org.aksw.jena_sparql_api.cache.extra.CacheFrontend;
022import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2;
023import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory;
024import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
025import org.aksw.jena_sparql_api.delay.core.QueryExecutionFactoryDelay;
026import org.aksw.jena_sparql_api.http.QueryExecutionHttpWrapper;
027import org.aksw.jena_sparql_api.retry.core.QueryExecutionFactoryRetry;
028import org.apache.jena.riot.WebContent;
029import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
030import org.dllearner.core.AbstractKnowledgeSource;
031import org.dllearner.core.ComponentAnn;
032import org.dllearner.core.ComponentInitException;
033import org.dllearner.core.KnowledgeSource;
034import org.dllearner.core.annotations.NoConfigOption;
035import org.dllearner.core.config.ConfigOption;
036import org.dllearner.kb.sparql.SPARQLTasks;
037import org.dllearner.kb.sparql.SparqlEndpoint;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import java.net.URL;
042import java.util.LinkedList;
043import java.util.List;
044import java.util.concurrent.TimeUnit;
045
046/**
047 * SPARQL endpoint knowledge source (without fragment extraction),
048 * in particular for those algorithms which work directly on an endpoint
049 * without requiring an OWL reasoner.
050 *
051 * @author Jens Lehmann
052 *
053 */
054@ComponentAnn(name = "SPARQL endpoint", shortName = "sparql", version = 0.2)
055public class SparqlEndpointKS extends AbstractKnowledgeSource {
056
057        private static final Logger logger = LoggerFactory.getLogger(SparqlEndpointKS.class);
058
059        private SparqlEndpoint endpoint;
060        @NoConfigOption
061        private CacheFrontend cache;
062        @NoConfigOption // auto-detected
063        private boolean supportsSPARQL_1_1 = false;
064        private boolean isRemote = true;
065
066        @ConfigOption(description="URL of the SPARQL endpoint", required=true)
067        private URL url;
068
069        @ConfigOption(description="a list of default graph URIs", defaultValue="{}", required=false)
070        private List<String> defaultGraphURIs = new LinkedList<>();
071
072        @ConfigOption(description="a list of named graph URIs", defaultValue="{}", required=false)
073        private List<String> namedGraphURIs = new LinkedList<>();
074
075        // some parameters for the query execution
076        @ConfigOption(defaultValue = "50", description = "Use this setting to avoid overloading the endpoint with a sudden burst of queries. A value below 0 means no delay.", required = false)
077        private long queryDelay = 50;
078
079        // caching options
080        @ConfigOption(defaultValue = "true", description = "Use this setting to enable caching of SPARQL queries in a local database.", required = false)
081        private boolean useCache = true;
082
083        @ConfigOption(defaultValue = "tmp folder of the system", description = "The base directory of the SPARQL query cache.", required = false)
084        protected String cacheDir = System.getProperty("java.io.tmpdir") + "/sparql-cache;COMPRESS=TRUE";
085
086        @ConfigOption(defaultValue = "86400", description = "The time to live in milliseconds for cached SPARQL queries, if enabled. The default value is 86400s(=1 day).", required = false)
087        protected long cacheTTL = TimeUnit.DAYS.toMillis(1);
088
089        @ConfigOption(defaultValue = "3", description = "The maximum number of retries for the execution of a particular SPARQL query.", required = false)
090        protected int retryCount = 3;
091
092        protected QueryExecutionFactory qef;
093
094        @ConfigOption(defaultValue = "10 000", description = "page size", exampleValue = "10000")
095        private long pageSize = 10000;
096        
097        private KnowledgeSource schema;
098
099        public SparqlEndpointKS() {}
100
101        public SparqlEndpointKS(SparqlEndpoint endpoint) {
102                this.endpoint = endpoint;
103        }
104        
105        public SparqlEndpointKS(SparqlEndpoint endpoint, KnowledgeSource schema) {
106                this.endpoint = endpoint;
107                this.schema = schema;
108        }
109
110        public SparqlEndpointKS(QueryExecutionFactory qef) {
111                this.qef = qef;
112        }
113
114        public SparqlEndpointKS(SparqlEndpoint endpoint, String cacheDirectory) {
115                this.endpoint = endpoint;
116                this.cacheDir = cacheDirectory;
117        }
118
119        public CacheFrontend getCache() {
120                return cache;
121        }
122
123        public QueryExecutionFactory getQueryExecutionFactory() {
124                return qef;
125        }
126
127        /**
128         * @param cache the cache to set
129         */
130        public void setCache(CacheFrontend cache) {
131                this.cache = cache;
132        }
133
134        public void setQueryExecutionFactory(QueryExecutionFactory qef) {
135                this.qef = qef;
136        }
137
138        @Override
139        public void init() throws ComponentInitException {
140                if(!initialized){
141                        if(isRemote()) {
142                                if(endpoint == null) {
143                                        endpoint = new SparqlEndpoint(url, defaultGraphURIs, namedGraphURIs);
144                                }
145                                supportsSPARQL_1_1 = new SPARQLTasks(endpoint).supportsSPARQL_1_1();
146                        }
147
148                        if(qef == null) {
149                                qef = buildQueryExecutionFactory();
150                        }
151
152                        initialized = true;
153                }
154                
155                initialized = true;
156                logger.info("SPARQL KB setup:\n" + toString());
157        }
158
159        protected QueryExecutionFactory buildQueryExecutionFactory() {
160                /*QueryExecutionFactory qef = new org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp(
161                                endpoint.getURL().toString(),
162                                endpoint.getDefaultGraphURIs());*/
163                QueryExecutionFactory qef = FluentQueryExecutionFactory
164                                .http(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs())
165                                .config().withPostProcessor(qe -> ((QueryEngineHTTP) ((QueryExecutionHttpWrapper) qe).getDecoratee())
166                                                .setModelContentType(WebContent.contentTypeRDFXML))
167                                .end()
168                                .create();
169
170                if(useCache) {
171                        qef = CacheUtilsH2.createQueryExecutionFactory(qef, cacheDir, false, cacheTTL );
172                } else {
173                        // use in-memory cache
174                        qef = CacheUtilsH2.createQueryExecutionFactory(qef, cacheDir, true, cacheTTL);
175                }
176
177                // add some delay
178                qef = new QueryExecutionFactoryDelay(qef, queryDelay);
179
180                if(retryCount > 0) {
181                        qef = new QueryExecutionFactoryRetry(qef, retryCount, 1, TimeUnit.SECONDS);
182                }
183
184                // add pagination to avoid incomplete result sets due to limitations of the endpoint
185//              qef = new QueryExecutionFactoryPaginated(qef, pageSize);
186
187                return qef;
188        }
189
190        public void setPageSize(long pageSize) {
191                this.pageSize = pageSize;
192        }
193
194        public SparqlEndpoint getEndpoint() {
195                return endpoint;
196        }
197
198        public URL getUrl() {
199                return url;
200        }
201
202        public void setUrl(URL url) {
203                this.url = url;
204        }
205
206        public boolean isRemote() {
207                return isRemote;
208        }
209
210        public List<String> getDefaultGraphURIs() {
211                return defaultGraphURIs;
212        }
213
214        public void setDefaultGraphURIs(List<String> defaultGraphURIs) {
215                this.defaultGraphURIs = defaultGraphURIs;
216        }
217
218        public List<String> getNamedGraphURIs() {
219                return namedGraphURIs;
220        }
221
222        public void setNamedGraphURIs(List<String> namedGraphURIs) {
223                this.namedGraphURIs = namedGraphURIs;
224        }
225
226        public boolean supportsSPARQL_1_1() {
227                return supportsSPARQL_1_1;
228        }
229
230        public void setSupportsSPARQL_1_1(boolean supportsSPARQL_1_1) {
231                this.supportsSPARQL_1_1 = supportsSPARQL_1_1;
232        }
233
234        /**
235         * Set a delay between each sent SPARQL query to avoid overloading of the
236         * endpoint. Note that this does only make sense for remote endpoints and
237         * will be ignored for local files.
238         * @param queryDelay the delay in milliseconds
239         */
240        public void setQueryDelay(int queryDelay) {
241                this.queryDelay = queryDelay;
242        }
243
244        /**
245         * @param useCache the useCache to set
246         */
247        public void setUseCache(boolean useCache) {
248                this.useCache = useCache;
249        }
250
251        /**
252         * Set the file-based cache directory. Default is the temporary
253         * folder of the operating system retrieved by using java.io.tmpdir,
254         * i.e. in most cases
255         * <table>
256         * <tr><th>OS</th><th>Directory</th></tr>
257         * <tr><td>Linux</td><td>/tmp/</td></tr>
258         * <tr><td>Windows</td><td>C:\temp</td></tr>
259         * </table>
260         *
261         * @param cacheDir the absolute cache directory path
262         */
263        public void setCacheDir(String cacheDir) {
264                this.cacheDir = cacheDir;
265        }
266
267        /**
268         * Set the time-to-live for the file-based SPARQL cache.
269         * @param cacheTTL the time-to-live value in milliseconds
270         */
271        public void setCacheTTL(long cacheTTL) {
272                this.cacheTTL = cacheTTL;
273        }
274        
275        /**
276         * @return if exists, a knowledge source which contains the schema
277         */
278        public KnowledgeSource getSchema() {
279                return schema;
280        }
281
282        public int getRetryCount() {
283                return retryCount;
284        }
285
286        public void setRetryCount(int retryCount) {
287                this.retryCount = retryCount;
288        }
289
290        @Override
291        public String toString() {
292                String out = String.format("%-15s %-25s%n", "Endpoint:", "Remote");
293                if (qef != null) {
294                        out += String.format("%-15s %-25s%n", "URL:", qef.getId());
295                } else {
296                        out += String.format("%-15s %-25s%n", "URL:", "null");
297                }
298                out += String.format("%-15s %-25s%n", "Cache:", cacheDir);
299                out += String.format("%-15s %dms%n", "Delay:", queryDelay);
300                return out;
301        }
302
303}