001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.heuristics;
020
021import org.apache.jena.graph.Node;
022import org.apache.jena.query.QueryExecution;
023import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
024import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
025
026import java.util.List;
027import java.util.SortedSet;
028
029/**
030 * @author Lorenz Buehmann 
031 * @since May 4, 2015
032 */
033public class QueryTreeInformativeness {
034        
035        private QueryExecutionFactory qef;
036
037        public QueryTreeInformativeness(QueryExecutionFactory qef) {
038                this.qef = qef;
039        }
040        
041        public double getQueryTreeInformativeness(RDFResourceTree tree) {
042                double informativeness = 0;
043                SortedSet<Node> edges = tree.getEdges();
044                
045                for (Node edge : edges) {
046                        // ITF of edge in KB
047                        double itf = getInverseTripleFrequency(edge.getURI());
048                        
049                        double childrenInformativeness = 0;
050                        
051                        List<RDFResourceTree> children = tree.getChildren(edge);
052                        for (RDFResourceTree child : children) {
053                                childrenInformativeness += getQueryTreeInformativeness(child);
054                        }
055                        
056                        // divide by number of children for current edge
057                        childrenInformativeness /= children.size();
058                        
059                        // add itf(e) * informativeness(c) for all children c
060                        informativeness += itf * childrenInformativeness;
061                }
062                
063                // divide by number of distinct edges
064                informativeness /= edges.size();
065                
066                return informativeness;
067        }
068        
069        public double getInverseTripleFrequency(String property) {
070                // total number of triples
071                String query = "SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o .}";
072                QueryExecution qe = qef.createQueryExecution(query);
073                int total = qe.execSelect().next().getLiteral("cnt").getInt();
074                qe.close();
075                
076                // number of triples with predicate
077                int frequency = getPropertyFrequency(property);
078                
079                double itf = Math.log(total / (double) frequency);
080                
081                return itf;
082        }
083        
084        public int getPropertyFrequency(String property) {
085                // number of triples with predicate
086                String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o .}", property);
087                QueryExecution qe = qef.createQueryExecution(query);
088                int frequency = qe.execSelect().next().getLiteral("cnt").getInt();
089                qe.close();
090
091                return frequency;
092        }
093
094}