001/**
002 * Copyright (C) 2007 - 2016, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package org.dllearner.algorithms.qtl.util;
020
021import java.net.URL;
022
023import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
024import org.dllearner.kb.SparqlEndpointKS;
025import org.dllearner.kb.sparql.SparqlEndpoint;
026import org.semanticweb.owlapi.model.IRI;
027import org.semanticweb.owlapi.model.OWLIndividual;
028import org.semanticweb.owlapi.model.OWLProperty;
029
030import uk.ac.manchester.cs.owl.owlapi.OWLNamedIndividualImpl;
031import uk.ac.manchester.cs.owl.owlapi.OWLObjectPropertyImpl;
032
033import org.apache.jena.query.QueryExecution;
034
035/**
036 * Contains a set of measures to compute the informativeness of a triple based on the work proposed in 
037 * REWOrD: Semantic Relatedness, AAAI 2012.
038 * 
039 * 
040 * @author Lorenz Buehmann
041 *
042 */
043public class InformativenessMeasures {
044        
045
046        private QueryExecutionFactory qef;
047
048        public InformativenessMeasures(QueryExecutionFactory qef) {
049                this.qef = qef;
050        }
051        
052        /**
053         * <p>
054         * The inverse triple frequency ITF(p), considers how many times a predicate
055         * is used in some RDF triple w.r.t. the total number of triples, and is
056         * defined as: 
057         * </p>
058         * 
059         * <p><code>log(|T|/|T(p)|)</code></p>
060         * 
061         * <p>
062         * where |T| is the total number of triples in the knowledge base and |T(p)|
063         * the total number of triples having p as a predicate.
064         * </p>
065         * 
066         * @param property the predicate
067         * @return the inverse triple frequency
068         */
069        public double getInverseTripleFrequency(OWLProperty property) {
070                // total number of triples
071                String query = "SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o .}";
072                QueryExecution qe = qef.createQueryExecution(query);
073                int total = qe.execSelect().next().getLiteral("cnt").getInt();
074                qe.close();
075                
076                // number of triples with predicate
077                query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o .}", property.toStringID());
078                qe = qef.createQueryExecution(query);
079                int frequency = qe.execSelect().next().getLiteral("cnt").getInt();
080                qe.close();
081                
082                
083                double itf = Math.log(total / (double) frequency);
084                
085                return itf;
086        }
087        
088        /**
089         * Predicate Frequency(PF) quantifies the informativeness of a predicate p
090         * in the context of a URI u. With context we mean the RDF triples where p
091         * and u appear together.
092         * 
093         * @param individual
094         * @param property the predicate
095         * @param outgoing
096         * @return
097         */
098        public double getPredicateFrequency(OWLIndividual individual, OWLProperty property, boolean outgoing) {
099                String query = outgoing ? "SELECT (COUNT(*) AS ?cnt) WHERE {<%s> <%s> ?o .}" : "SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> <%s> .}";
100                query = String.format(query, individual.toStringID(), property.toStringID());
101                QueryExecution qe = qef.createQueryExecution(query);
102                int pf = qe.execSelect().next().getLiteral("cnt").getInt();
103                qe.close();
104                
105                return pf;
106        }
107        
108        public double getPF_ITF(OWLIndividual individual, OWLProperty property, boolean outgoing) {
109                double itf = getInverseTripleFrequency(property);
110                double pf = getPredicateFrequency(individual, property, outgoing);
111                return pf * itf;
112        }
113        
114        public static void main(String[] args) throws Exception {
115                SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(
116                                        new URL("http://dbpedia.org/sparql"), 
117                                        "http://dbpedia.org"));
118                ks.init();
119                
120                OWLProperty p1 = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/birthPlace"));
121                OWLProperty p2 = new OWLObjectPropertyImpl(IRI.create("http://dbpedia.org/ontology/genre"));
122                
123                OWLIndividual ind1 = new OWLNamedIndividualImpl(IRI.create("http://dbpedia.org/resource/Kid_Canaveral"));
124                
125                InformativenessMeasures informativenessMeasures = new InformativenessMeasures(ks.getQueryExecutionFactory());
126                
127                double itf1 = informativenessMeasures.getInverseTripleFrequency(p1);
128                System.out.println("itf(" + p1 + ") = " + itf1);
129                
130                double itf2 = informativenessMeasures.getInverseTripleFrequency(p2);
131                System.out.println("itf(" + p2 + ") = " + itf2);
132                
133                double pf1_out = informativenessMeasures.getPredicateFrequency(ind1, p1, true);
134                double pf1_in = informativenessMeasures.getPredicateFrequency(ind1, p1, false);
135                System.out.println("pf_out(" + ind1 + "," + p1 + ") = " + pf1_out);
136                System.out.println("pf_in(" + ind1 + "," + p1 + ") = " + pf1_in);
137                
138                double pf2_out = informativenessMeasures.getPredicateFrequency(ind1, p2, true);
139                double pf2_in = informativenessMeasures.getPredicateFrequency(ind1, p2, false);
140                System.out.println("pf_out(" + ind1 + "," + p2 + ") = " + pf2_out);
141                System.out.println("pf_in(" + ind1 + "," + p2 + ") = " + pf2_in);
142        }
143
144}