001/** 002 * 003 */ 004package org.dllearner.algorithms.isle.index.syntactic; 005 006import java.io.IOException; 007import java.nio.file.Files; 008import java.util.Collection; 009import java.util.HashSet; 010import java.util.Set; 011 012import org.apache.lucene.analysis.Analyzer; 013import org.apache.lucene.analysis.standard.StandardAnalyzer; 014import org.apache.lucene.document.Field; 015import org.apache.lucene.document.FieldType; 016import org.apache.lucene.document.StringField; 017import org.apache.lucene.document.TextField; 018import org.apache.lucene.index.IndexWriter; 019import org.apache.lucene.index.IndexWriterConfig; 020import org.apache.lucene.store.Directory; 021import org.apache.lucene.store.MMapDirectory; 022import org.dllearner.algorithms.isle.index.Index; 023import org.semanticweb.owlapi.model.OWLAnnotation; 024import org.semanticweb.owlapi.model.OWLAnnotationProperty; 025import org.semanticweb.owlapi.model.OWLDataFactory; 026import org.semanticweb.owlapi.model.OWLEntity; 027import org.semanticweb.owlapi.model.OWLLiteral; 028import org.semanticweb.owlapi.model.OWLOntology; 029import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; 030 031import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; 032 033/** 034 * Creates a Lucene Index for the labels if classes and properties. 035 * @author Lorenz Buehmann 036 * 037 */ 038public class OWLOntologyLuceneSyntacticIndexCreator { 039 040 private Directory directory = new MMapDirectory(Files.createTempDirectory("Lucene")); 041 private OWLOntology ontology; 042 private Set<OWLEntity> schemaEntities; 043 044 private OWLDataFactory df = new OWLDataFactoryImpl(); 045 private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); 046 private String language = "en"; 047 private String searchField; 048 049 public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { 050 this.ontology = ontology; 051 this.annotationProperty = annotationProperty; 052 this.searchField = searchField; 053 054 schemaEntities = new HashSet<>(); 055 schemaEntities.addAll(ontology.getClassesInSignature()); 056 schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); 057 schemaEntities.addAll(ontology.getDataPropertiesInSignature()); 058 } 059 060 public Index buildIndex() throws Exception{ 061 Analyzer analyzer = new StandardAnalyzer(); 062 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); 063 IndexWriter writer = new IndexWriter(directory, indexWriterConfig); 064 System.out.println( "Creating index ..." ); 065 066 Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<>(); 067 FieldType stringType = new FieldType(StringField.TYPE_STORED); 068 stringType.setStoreTermVectors(false); 069 FieldType textType = new FieldType(TextField.TYPE_STORED); 070 textType.setStoreTermVectors(false); 071 072 for (OWLEntity entity : schemaEntities) { 073 String label = null; 074 Collection<OWLAnnotation> annotations = ontology.getAnnotations(); 075 for (OWLAnnotation annotation : annotations) { 076 if (annotation.getProperty().equals(annotationProperty) && 077 annotation.getValue() instanceof OWLLiteral) { 078 OWLLiteral val = (OWLLiteral) annotation.getValue(); 079 if (val.hasLang(language)) { 080 label = val.getLiteral(); 081 } 082 } 083 } 084 085 if(label != null){ 086 org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); 087 luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); 088 luceneDocument.add(new Field(searchField, label, textType)); 089 luceneDocuments.add(luceneDocument); 090 } 091 092 } 093 writer.addDocuments(luceneDocuments); 094 095 System.out.println("Done."); 096 writer.close(); 097 098 return new LuceneSyntacticIndex(ontology, directory, searchField); 099 } 100 101 102 103}