001package org.dllearner.algorithms.isle; 002 003import java.util.List; 004import java.util.Properties; 005 006import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; 007import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; 008import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; 009import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; 010import edu.stanford.nlp.ling.CoreLabel; 011import edu.stanford.nlp.pipeline.Annotation; 012import edu.stanford.nlp.pipeline.StanfordCoreNLP; 013import edu.stanford.nlp.util.CoreMap; 014 015public class StanfordPartOfSpeechTagger { 016 017 private static StanfordPartOfSpeechTagger instance; 018 private StanfordCoreNLP pipeline; 019 020 private StanfordPartOfSpeechTagger(){ 021 Properties props = new Properties(); 022 props.put("annotators", "tokenize, ssplit, pos"); 023 pipeline = new StanfordCoreNLP(props); 024 } 025 026 public static synchronized StanfordPartOfSpeechTagger getInstance(){ 027 if(instance == null){ 028 instance = new StanfordPartOfSpeechTagger(); 029 } 030 return instance; 031 } 032 033 public String tag(String text) { 034 String out = ""; 035 036 // create an empty Annotation just with the given text 037 Annotation document = new Annotation(text); 038 039 // run all Annotators on this text 040 pipeline.annotate(document); 041 042 // these are all the sentences in this document 043 // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 044 List<CoreMap> sentences = document.get(SentencesAnnotation.class); 045 046 for(CoreMap sentence: sentences) { 047 for (CoreLabel token: sentence.get(TokensAnnotation.class)) { 048 // this is the text of the token 049 String word = token.get(TextAnnotation.class); 050 // this is the POS tag of the token 051 String pos = token.get(PartOfSpeechAnnotation.class); 052 053 out += " " + word + "/" + pos; 054 } 055 } 056 057 return out.trim(); 058 } 059}