001/**
002 * 
003 */
004package org.dllearner.algorithms.isle.textretrieval;
005
006import com.google.common.base.Joiner;
007import org.dllearner.algorithms.isle.TextDocumentGenerator;
008import org.dllearner.algorithms.isle.index.LinguisticUtil;
009import org.dllearner.algorithms.isle.index.Token;
010import org.dllearner.kb.OWLAPIOntology;
011import org.semanticweb.owlapi.model.*;
012import org.semanticweb.owlapi.search.EntitySearcher;
013import org.semanticweb.owlapi.util.IRIShortFormProvider;
014import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
015import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
016
017import java.util.*;
018
019/**
020 * @author Lorenz Buehmann
021 *
022 */
023public class AnnotationEntityTextRetriever implements EntityTextRetriever{
024        
025        private OWLOntology ontology;
026        private OWLOntologyManager manager;
027        
028        private String language = "en";
029        private double weight = 1d;
030        
031        private boolean useShortFormFallback = true;
032        private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider();
033        protected boolean determineHeadNoun = false;
034        
035        private OWLAnnotationProperty[] properties;
036        
037        private static final OWLClass OWL_THING = new OWLDataFactoryImpl().getOWLThing();
038
039        public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) {
040                this.ontology = ontology;
041                this.properties = properties;
042        }
043        
044        public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) {
045                this.ontology = ontology.createOWLOntology(manager);
046        }
047        
048        /**
049         * @param language the language to set
050         */
051        public void setLanguage(String language) {
052                this.language = language;
053        }
054        
055        /**
056         * Whether to use the short form of the IRI as fallback, if no label is given.
057         * @param useShortFormFallback the useShortFormFallback to set
058         */
059        public void setUseShortFormFallback(boolean useShortFormFallback) {
060                this.useShortFormFallback = useShortFormFallback;
061        }
062
063        /* (non-Javadoc)
064         * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity)
065         */
066        @Override
067        public Map<List<Token>, Double> getRelevantText(OWLEntity entity) {
068                Map<List<Token>, Double> textWithWeight = new HashMap<>();
069                
070                for (OWLAnnotationProperty property : properties) {
071                        Collection<OWLAnnotation> annotations = EntitySearcher.getAnnotations(entity, ontology, property);
072                        for (OWLAnnotation annotation : annotations) {
073                                if (annotation.getValue() instanceof OWLLiteral) {
074                            OWLLiteral val = (OWLLiteral) annotation.getValue();
075                            if (val.hasLang(language)) {
076                                //trim
077                                String label = val.getLiteral().trim();
078                                if(entity.isOWLClass()){
079                                        label = label.toLowerCase();
080                                }
081                                //remove content in brackets like (...)
082                                label = label.replaceAll("\\s?\\((.*?)\\)", "");
083                                try {
084                                                        textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(label, determineHeadNoun), weight);
085                                                } catch (Exception e1) {
086                                                        e1.printStackTrace();
087                                                }
088                            }
089                        }
090                        }
091                }
092                
093                if(textWithWeight.isEmpty() && useShortFormFallback){
094                        String shortForm = sfp.getShortForm(entity.getIRI());
095                        shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm));
096                        shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim();
097                        textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(shortForm, determineHeadNoun), weight);
098                }
099                
100                return textWithWeight;
101        }
102        
103        @Override
104        public Map<String, Double> getRelevantTextSimple(OWLEntity entity) {
105                Map<String, Double> textWithWeight = new HashMap<>();
106                
107                for (OWLAnnotationProperty property : properties) {
108                        Collection<OWLAnnotation> annotations = EntitySearcher.getAnnotations(entity, ontology, property);
109                        for (OWLAnnotation annotation : annotations) {
110                                if (annotation.getValue() instanceof OWLLiteral) {
111                            OWLLiteral val = (OWLLiteral) annotation.getValue();
112                            if (val.hasLang(language)) {
113                                //trim
114                                String label = val.getLiteral().trim();
115                                if(entity.isOWLClass()){
116                                        label = label.toLowerCase();
117                                }
118                                //remove content in brackets like (...)
119                                label = label.replaceAll("\\s?\\((.*?)\\)", "");
120                                try {
121                                                        textWithWeight.put(label, weight);
122                                                } catch (Exception e1) {
123                                                        e1.printStackTrace();
124                                                }
125                            }
126                        }
127                        }
128                }
129                
130                if(textWithWeight.isEmpty() && useShortFormFallback){
131                        String shortForm = sfp.getShortForm(IRI.create(entity.toStringID()));
132                        shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm));
133                        shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim();
134                        textWithWeight.put(shortForm, weight);
135                }
136                
137                return textWithWeight;
138        }
139        
140        /**
141         * Returns for each entity in the ontology all relevant text, i.e. either the annotations or the short form of the IRI as fallback.
142         * @return
143         */
144        @Override
145        public Map<OWLEntity, Set<List<Token>>> getRelevantText(OWLOntology ontology) {
146                Map<OWLEntity, Set<List<Token>>> entity2RelevantText = new HashMap<>();
147                
148                Set<OWLEntity> schemaEntities = new HashSet<>();
149                schemaEntities.addAll(ontology.getClassesInSignature());
150                schemaEntities.addAll(ontology.getObjectPropertiesInSignature());
151                schemaEntities.addAll(ontology.getDataPropertiesInSignature());
152                schemaEntities.remove(OWL_THING);
153                
154                Map<List<Token>, Double> relevantText;
155                for (OWLEntity entity : schemaEntities) {
156                        relevantText = getRelevantText(entity);
157                        entity2RelevantText.put(entity, relevantText.keySet());
158                }
159                
160                return entity2RelevantText;
161        }
162}