001package org.dllearner.algorithms.isle.index; 002 003import org.dllearner.algorithms.isle.EntityCandidateGenerator; 004import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation; 005 006import java.util.HashMap; 007import java.util.HashSet; 008import java.util.Set; 009 010/** 011 * Provides methods to annotate documents. 012 * 013 * @author Daniel Fleischhacker 014 */ 015public class SemanticAnnotator { 016 017 private WordSenseDisambiguation wordSenseDisambiguation; 018 private EntityCandidateGenerator entityCandidateGenerator; 019 private LinguisticAnnotator linguisticAnnotator; 020 021 022 /** 023 * Initialize this semantic annotator to use the entities from the provided ontology. 024 * 025 */ 026 public SemanticAnnotator(WordSenseDisambiguation wordSenseDisambiguation, 027 EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { 028 this.wordSenseDisambiguation = wordSenseDisambiguation; 029 this.entityCandidateGenerator = entityCandidateGenerator; 030 this.linguisticAnnotator = linguisticAnnotator; 031 } 032 033 /** 034 * Processes the given document and returns the annotated version of this document. 035 * 036 * @param document the document to annotate 037 * @return the given document extended with annotations 038 */ 039 public AnnotatedDocument processDocument(TextDocument document){ 040 Set<Annotation> annotations = linguisticAnnotator.annotate(document); 041 Set<SemanticAnnotation> semanticAnnotations = new HashSet<>(); 042 HashMap<Annotation, Set<EntityScorePair>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations); 043 for (Annotation annotation : candidatesMap.keySet()) { 044 Set<EntityScorePair> candidateEntities = candidatesMap.get(annotation); 045 if (candidateEntities == null || candidateEntities.size() == 0) { 046 continue; 047 } 048 SemanticAnnotation semanticAnnotation = wordSenseDisambiguation.disambiguate(annotation, candidateEntities); 049 if(semanticAnnotation != null){ 050 semanticAnnotations.add(semanticAnnotation); 051 } 052 } 053 AnnotatedDocument annotatedDocument = new AnnotatedTextDocument(document, semanticAnnotations); 054 return annotatedDocument; 055 } 056}