001/** 002 * 003 */ 004package org.dllearner.algorithms.isle.textretrieval; 005 006import com.google.common.base.Joiner; 007import org.dllearner.algorithms.isle.TextDocumentGenerator; 008import org.dllearner.algorithms.isle.index.LinguisticUtil; 009import org.dllearner.algorithms.isle.index.Token; 010import org.dllearner.kb.OWLAPIOntology; 011import org.semanticweb.owlapi.model.*; 012import org.semanticweb.owlapi.search.EntitySearcher; 013import org.semanticweb.owlapi.util.IRIShortFormProvider; 014import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; 015import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 016 017import java.util.*; 018 019/** 020 * @author Lorenz Buehmann 021 * 022 */ 023public class AnnotationEntityTextRetriever implements EntityTextRetriever{ 024 025 private OWLOntology ontology; 026 private OWLOntologyManager manager; 027 028 private String language = "en"; 029 private double weight = 1d; 030 031 private boolean useShortFormFallback = true; 032 private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); 033 protected boolean determineHeadNoun = false; 034 035 private OWLAnnotationProperty[] properties; 036 037 private static final OWLClass OWL_THING = new OWLDataFactoryImpl().getOWLThing(); 038 039 public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { 040 this.ontology = ontology; 041 this.properties = properties; 042 } 043 044 public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { 045 this.ontology = ontology.createOWLOntology(manager); 046 } 047 048 /** 049 * @param language the language to set 050 */ 051 public void setLanguage(String language) { 052 this.language = language; 053 } 054 055 /** 056 * Whether to use the short form of the IRI as fallback, if no label is given. 057 * @param useShortFormFallback the useShortFormFallback to set 058 */ 059 public void setUseShortFormFallback(boolean useShortFormFallback) { 060 this.useShortFormFallback = useShortFormFallback; 061 } 062 063 /* (non-Javadoc) 064 * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) 065 */ 066 @Override 067 public Map<List<Token>, Double> getRelevantText(OWLEntity entity) { 068 Map<List<Token>, Double> textWithWeight = new HashMap<>(); 069 070 for (OWLAnnotationProperty property : properties) { 071 Collection<OWLAnnotation> annotations = EntitySearcher.getAnnotations(entity, ontology, property); 072 for (OWLAnnotation annotation : annotations) { 073 if (annotation.getValue() instanceof OWLLiteral) { 074 OWLLiteral val = (OWLLiteral) annotation.getValue(); 075 if (val.hasLang(language)) { 076 //trim 077 String label = val.getLiteral().trim(); 078 if(entity.isOWLClass()){ 079 label = label.toLowerCase(); 080 } 081 //remove content in brackets like (...) 082 label = label.replaceAll("\\s?\\((.*?)\\)", ""); 083 try { 084 textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(label, determineHeadNoun), weight); 085 } catch (Exception e1) { 086 e1.printStackTrace(); 087 } 088 } 089 } 090 } 091 } 092 093 if(textWithWeight.isEmpty() && useShortFormFallback){ 094 String shortForm = sfp.getShortForm(entity.getIRI()); 095 shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm)); 096 shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim(); 097 textWithWeight.put(TextDocumentGenerator.getInstance().generateDocument(shortForm, determineHeadNoun), weight); 098 } 099 100 return textWithWeight; 101 } 102 103 @Override 104 public Map<String, Double> getRelevantTextSimple(OWLEntity entity) { 105 Map<String, Double> textWithWeight = new HashMap<>(); 106 107 for (OWLAnnotationProperty property : properties) { 108 Collection<OWLAnnotation> annotations = EntitySearcher.getAnnotations(entity, ontology, property); 109 for (OWLAnnotation annotation : annotations) { 110 if (annotation.getValue() instanceof OWLLiteral) { 111 OWLLiteral val = (OWLLiteral) annotation.getValue(); 112 if (val.hasLang(language)) { 113 //trim 114 String label = val.getLiteral().trim(); 115 if(entity.isOWLClass()){ 116 label = label.toLowerCase(); 117 } 118 //remove content in brackets like (...) 119 label = label.replaceAll("\\s?\\((.*?)\\)", ""); 120 try { 121 textWithWeight.put(label, weight); 122 } catch (Exception e1) { 123 e1.printStackTrace(); 124 } 125 } 126 } 127 } 128 } 129 130 if(textWithWeight.isEmpty() && useShortFormFallback){ 131 String shortForm = sfp.getShortForm(IRI.create(entity.toStringID())); 132 shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm)); 133 shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim(); 134 textWithWeight.put(shortForm, weight); 135 } 136 137 return textWithWeight; 138 } 139 140 /** 141 * Returns for each entity in the ontology all relevant text, i.e. either the annotations or the short form of the IRI as fallback. 142 * @return 143 */ 144 @Override 145 public Map<OWLEntity, Set<List<Token>>> getRelevantText(OWLOntology ontology) { 146 Map<OWLEntity, Set<List<Token>>> entity2RelevantText = new HashMap<>(); 147 148 Set<OWLEntity> schemaEntities = new HashSet<>(); 149 schemaEntities.addAll(ontology.getClassesInSignature()); 150 schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); 151 schemaEntities.addAll(ontology.getDataPropertiesInSignature()); 152 schemaEntities.remove(OWL_THING); 153 154 Map<List<Token>, Double> relevantText; 155 for (OWLEntity entity : schemaEntities) { 156 relevantText = getRelevantText(entity); 157 entity2RelevantText.put(entity, relevantText.keySet()); 158 } 159 160 return entity2RelevantText; 161 } 162}