001/** 002 * Copyright (C) 2007 - 2016, Jens Lehmann 003 * 004 * This file is part of DL-Learner. 005 * 006 * DL-Learner is free software; you can redistribute it and/or modify 007 * it under the terms of the GNU General Public License as published by 008 * the Free Software Foundation; either version 3 of the License, or 009 * (at your option) any later version. 010 * 011 * DL-Learner is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 018 */ 019package org.dllearner.algorithms.qtl.util; 020 021import java.net.URL; 022 023import org.aksw.jena_sparql_api.core.QueryExecutionFactory; 024import org.dllearner.kb.SparqlEndpointKS; 025import org.dllearner.kb.sparql.SparqlEndpoint; 026import org.semanticweb.owlapi.model.IRI; 027import org.semanticweb.owlapi.model.OWLIndividual; 028import org.semanticweb.owlapi.model.OWLProperty; 029 030import uk.ac.manchester.cs.owl.owlapi.OWLNamedIndividualImpl; 031import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl; 032 033import org.apache.jena.query.QueryExecution; 034 035/** 036 * Contains a set of measures to compute the informativeness of a triple based on the work proposed in 037 * REWOrD: Semantic Relatedness, AAAI 2012. 038 * 039 * 040 * @author Lorenz Buehmann 041 * 042 */ 043public class InformativenessMeasures { 044 045 046 private QueryExecutionFactory qef; 047 048 public InformativenessMeasures(QueryExecutionFactory qef) { 049 this.qef = qef; 050 } 051 052 /** 053 * <p> 054 * The inverse triple frequency ITF(p), considers how many times a predicate 055 * is used in some RDF triple w.r.t. the total number of triples, and is 056 * defined as: 057 * </p> 058 * 059 * <p><code>log(|T|/|T(p)|)</code></p> 060 * 061 * <p> 062 * where |T| is the total number of triples in the knowledge base and |T(p)| 063 * the total number of triples having p as a predicate. 064 * </p> 065 * 066 * @param property the predicate 067 * @return the inverse triple frequency 068 */ 069 public double getInverseTripleFrequency(OWLProperty property) { 070 // total number of triples 071 String query = "SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o .}"; 072 QueryExecution qe = qef.createQueryExecution(query); 073 int total = qe.execSelect().next().getLiteral("cnt").getInt(); 074 qe.close(); 075 076 // number of triples with predicate 077 query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o .}", property.toStringID()); 078 qe = qef.createQueryExecution(query); 079 int frequency = qe.execSelect().next().getLiteral("cnt").getInt(); 080 qe.close(); 081 082 083 double itf = Math.log(total / (double) frequency); 084 085 return itf; 086 } 087 088 /** 089 * Predicate Frequency(PF) quantifies the informativeness of a predicate p 090 * in the context of a URI u. With context we mean the RDF triples where p 091 * and u appear together. 092 * 093 * @param individual 094 * @param property the predicate 095 * @param outgoing 096 * @return 097 */ 098 public double getPredicateFrequency(OWLIndividual individual, OWLProperty property, boolean outgoing) { 099 String query = outgoing ? "SELECT (COUNT(*) AS ?cnt) WHERE {<%s> <%s> ?o .}" : "SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> <%s> .}"; 100 query = String.format(query, individual.toStringID(), property.toStringID()); 101 QueryExecution qe = qef.createQueryExecution(query); 102 int pf = qe.execSelect().next().getLiteral("cnt").getInt(); 103 qe.close(); 104 105 return pf; 106 } 107 108 public double getPF_ITF(OWLIndividual individual, OWLProperty property, boolean outgoing) { 109 double itf = getInverseTripleFrequency(property); 110 double pf = getPredicateFrequency(individual, property, outgoing); 111 return pf * itf; 112 } 113 114 public static void main(String[] args) throws Exception { 115 SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint( 116 new URL("http://dbpedia.org/sparql"), 117 "http://dbpedia.org")); 118 ks.init(); 119 120 OWLProperty p1 = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/birthPlace")); 121 OWLProperty p2 = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/genre")); 122 123 OWLIndividual ind1 = new OWLNamedIndividualImpl(IRI.create("http://dbpedia.org/resource/Kid_Canaveral")); 124 125 InformativenessMeasures informativenessMeasures = new InformativenessMeasures(ks.getQueryExecutionFactory()); 126 127 double itf1 = informativenessMeasures.getInverseTripleFrequency(p1); 128 System.out.println("itf(" + p1 + ") = " + itf1); 129 130 double itf2 = informativenessMeasures.getInverseTripleFrequency(p2); 131 System.out.println("itf(" + p2 + ") = " + itf2); 132 133 double pf1_out = informativenessMeasures.getPredicateFrequency(ind1, p1, true); 134 double pf1_in = informativenessMeasures.getPredicateFrequency(ind1, p1, false); 135 System.out.println("pf_out(" + ind1 + "," + p1 + ") = " + pf1_out); 136 System.out.println("pf_in(" + ind1 + "," + p1 + ") = " + pf1_in); 137 138 double pf2_out = informativenessMeasures.getPredicateFrequency(ind1, p2, true); 139 double pf2_in = informativenessMeasures.getPredicateFrequency(ind1, p2, false); 140 System.out.println("pf_out(" + ind1 + "," + p2 + ") = " + pf2_out); 141 System.out.println("pf_in(" + ind1 + "," + p2 + ") = " + pf2_in); 142 } 143 144}