001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl.heuristics; 020 021import org.apache.jena.graph.Node; 022import org.apache.jena.query.QueryExecution; 023import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 024import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree; 025 026import java.util.List; 027import java.util.SortedSet; 028 029/** 030 * @author Lorenz Buehmann 031 * @since May 4, 2015 032 */ 033public class QueryTreeInformativeness { 034 035 private QueryExecutionFactory qef; 036 037 public QueryTreeInformativeness(QueryExecutionFactory qef) { 038 this.qef = qef; 039 } 040 041 public double getQueryTreeInformativeness(RDFResourceTree tree) { 042 double informativeness = 0; 043 SortedSet<Node> edges = tree.getEdges(); 044 045 for (Node edge : edges) { 046 // ITF of edge in KB 047 double itf = getInverseTripleFrequency(edge.getURI()); 048 049 double childrenInformativeness = 0; 050 051 List<RDFResourceTree> children = tree.getChildren(edge); 052 for (RDFResourceTree child : children) { 053 childrenInformativeness += getQueryTreeInformativeness(child); 054 } 055 056 // divide by number of children for current edge 057 childrenInformativeness /= children.size(); 058 059 // add itf(e) * informativeness(c) for all children c 060 informativeness += itf * childrenInformativeness; 061 } 062 063 // divide by number of distinct edges 064 informativeness /= edges.size(); 065 066 return informativeness; 067 } 068 069 public double getInverseTripleFrequency(String property) { 070 // total number of triples 071 String query = "SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o .}"; 072 QueryExecution qe = qef.createQueryExecution(query); 073 int total = qe.execSelect().next().getLiteral("cnt").getInt(); 074 qe.close(); 075 076 // number of triples with predicate 077 int frequency = getPropertyFrequency(property); 078 079 double itf = Math.log(total / (double) frequency); 080 081 return itf; 082 } 083 084 public int getPropertyFrequency(String property) { 085 // number of triples with predicate 086 String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o .}", property); 087 QueryExecution qe = qef.createQueryExecution(query); 088 int frequency = qe.execSelect().next().getLiteral("cnt").getInt(); 089 qe.close(); 090 091 return frequency; 092 } 093 094}