Source code

001package org.dllearner.utilities;
002
003import com.google.common.collect.Sets;
004import org.apache.jena.query.ParameterizedSparqlString;
005import org.apache.jena.query.QueryExecution;
006import org.dllearner.core.AbstractReasonerComponent;
007import org.dllearner.accuracymethods.AccMethodApproximate;
008import org.dllearner.accuracymethods.AccMethodThreeValued;
009import org.dllearner.learningproblems.ClassLearningProblem;
010import org.dllearner.reasoning.SPARQLReasoner;
011import org.dllearner.utilities.owl.OWLClassExpressionToSPARQLConverter;
012import org.semanticweb.owlapi.model.OWLClassExpression;
013import org.semanticweb.owlapi.model.OWLDataFactory;
014import org.semanticweb.owlapi.model.OWLIndividual;
015import org.semanticweb.owlapi.model.OWLObjectIntersectionOf;
016import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
017
018import java.util.Collection;
019import java.util.Set;
020
021/**
022 * Extension of ReasoningUtils for ClassLearningProblem
023 */
024public class ReasoningUtilsCLP extends ReasoningUtils {
025        final private ClassLearningProblem problem;
026        private OWLDataFactory df = new OWLDataFactoryImpl();
027        private OWLClassExpressionToSPARQLConverter converter = new OWLClassExpressionToSPARQLConverter();
028
029
030        /**
031         * @param problem class learning problem
032         * @param reasoner reasoner component
033         */
034        public ReasoningUtilsCLP(ClassLearningProblem problem, AbstractReasonerComponent reasoner) {
035                super(reasoner);
036                this.problem = problem;
037        }
038
039        /**
040         * specialisation to indicate that calculation needs to be interrupted
041         * @return
042         */
043        @Override
044        protected boolean interrupted() {
045                return problem.terminationTimeExpired();
046        }
047
048        /**
049         * get coverage result for class learning problem. currently this is specialised for SPARQL reasoner and uses generic getCoverage otherwise
050         * @param description the description to test
051         * @param classInstances instances of the target class
052         * @param superClassInstances instaces of the superclass
053         * @return array of coverage data
054         */
055        public Coverage[] getCoverageCLP(OWLClassExpression description, Collection<OWLIndividual> classInstances,
056                                     Collection<OWLIndividual> superClassInstances) {
057                if (reasoner instanceof SPARQLReasoner) {
058                        SPARQLReasoner reasoner2 = (SPARQLReasoner)reasoner;
059                        Coverage[] ret = new Coverage[2];
060                        ret[0] = new Coverage();
061                        ret[1] = new Coverage();
062
063                        // R(C)
064                        String query = "SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {"
065                                        + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
066                                        + converter.convert("?s", description)
067                                        + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
068                        ParameterizedSparqlString template = new ParameterizedSparqlString(query);
069                        //System.err.println(converter.convert("?s", description));
070                        //template.setIri("cls", description.asOWLClass().toStringID());
071                        template.setIri("classToDescribe", problem.getClassToDescribe().toStringID());
072
073                        QueryExecution qe = reasoner2.getQueryExecutionFactory().createQueryExecution(template.toString());
074                        ret[1].trueCount = qe.execSelect().next().getLiteral("cnt").getInt();
075                        ret[1].falseCount = superClassInstances.size() - ret[1].trueCount;
076
077                        // R(A)
078                        OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(problem.getClassToDescribe(), description);
079                        ret[0].trueCount = reasoner2.getPopularityOf(ce);
080                        ret[0].falseCount = classInstances.size() - ret[0].trueCount;
081
082                        return ret;
083                } else {
084                        return getCoverage(description, classInstances, superClassInstances);
085                }
086
087        }
088
089        /**
090         * Implementations of accuracy calculation for generalised measures according to method in A Note on the Evaluation of Inductive Concept Classification Procedures
091         * @param accuracyMethod method to use
092         * @param description description to test
093         * @param classInstances class instances. will be converted to set
094         * @param superClassInstances superclass instances. will be converted to set
095         * @param negatedClassInstances negated class instances. will be converted to set
096         * @param noise problem noise
097         * @return accuracy or -1
098         */
099        public double getAccuracyOrTooWeak3(AccMethodThreeValued accuracyMethod, OWLClassExpression description, Collection<OWLIndividual> classInstances, Collection<OWLIndividual> superClassInstances, Collection<OWLIndividual> negatedClassInstances, double noise) {
100                if (accuracyMethod instanceof AccMethodApproximate) {
101                        throw new RuntimeException();
102                } else {
103                        return getAccuracyOrTooWeakExact3(accuracyMethod, description, classInstances, superClassInstances, negatedClassInstances, noise);
104                }
105        }
106
107        /**
108         * @see #getAccuracyOrTooWeak3(AccMethodThreeValued, OWLClassExpression, Collection, Collection, Collection, double)
109         */
110        public double getAccuracyOrTooWeakExact3(AccMethodThreeValued accuracyMethod, OWLClassExpression description, Collection<OWLIndividual> classInstances, Collection<OWLIndividual> superClassInstances, Collection<OWLIndividual> negatedClassInstances, double noise) {
111                return getAccuracyOrTooWeakExact3(accuracyMethod, description,
112                                makeSet(classInstances), makeSet(superClassInstances), makeSet(negatedClassInstances),
113                                noise);
114        }
115
116        /**
117         * @see #getAccuracyOrTooWeakExact3(AccMethodThreeValued, OWLClassExpression, Collection, Collection, Collection, double)
118         */
119        public double getAccuracyOrTooWeakExact3(AccMethodThreeValued accuracyMethod, OWLClassExpression description, Set<OWLIndividual> classInstances, Set<OWLIndividual> superClassInstances, Set<OWLIndividual> negatedClassInstances, double noise) {
120                // implementation is based on:
121                // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
122                // default negation should be turned off when using fast instance checker
123                // compute I_C (negated and non-negated concepts separately)
124                ReasoningUtils.Coverage3[] cc = getCoverage3(description, df.getOWLObjectComplementOf(description), Sets.union(classInstances, superClassInstances));
125                // trueSet = icPos, falseSet = icNeg
126                if (cc == null) { // timeout
127                        return 0;
128                }
129                // semantic precision
130                // first compute I_C \cap Cn(DC)
131                // it seems that in our setting, we can ignore Cn, because the examples (class instances)
132                // are already part of the background knowledge
133                Set<OWLIndividual> tmp1Pos = Sets.intersection(cc[0].trueSet, classInstances);
134                Set<OWLIndividual> tmp1Neg = Sets.intersection(cc[0].falseSet, negatedClassInstances);
135                // icPos + icNeg <===> all returned results
136                // --> precision = tmp1size / (icpos + icneg)
137                // classInstances + negatedClassInstances <==> all results that should be returned
138                // -> recall = tmp1size / (cI + ncI)
139
140                // F_beta = true positives / (true positives + false negatives + false positives)
141
142                // Cn(I_C) \cap D_C is the same set if we ignore Cn ...
143                // ---> @@@@ AccMethodGenFMeasure
144                return accuracyMethod.getAccOrTooWeak3(tmp1Pos.size(), tmp1Neg.size(), cc[0].trueCount, cc[0].falseCount, classInstances.size(), negatedClassInstances.size(), noise);
145        }
146}