001package org.dllearner.algorithms.isle; 002 003import java.io.InputStream; 004import java.util.ArrayList; 005import java.util.Iterator; 006import java.util.List; 007 008import net.didion.jwnl.JWNL; 009import net.didion.jwnl.JWNLException; 010import net.didion.jwnl.data.IndexWord; 011import net.didion.jwnl.data.POS; 012import net.didion.jwnl.data.PointerTarget; 013import net.didion.jwnl.data.PointerUtils; 014import net.didion.jwnl.data.Synset; 015import net.didion.jwnl.data.Word; 016import net.didion.jwnl.data.list.PointerTargetNode; 017import net.didion.jwnl.data.list.PointerTargetNodeList; 018import net.didion.jwnl.dictionary.Dictionary; 019 020public class WordNet { 021 022 private static final double SYNONYM_FACTOR = 0.8; 023 private static final double HYPONYM_FACTOR = 0.4; 024 public Dictionary dict; 025 026 public WordNet() { 027 try { 028 JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); 029 dict = Dictionary.getInstance(); 030 } 031 catch (JWNLException e) { 032 e.printStackTrace(); 033 } 034 } 035 036 public WordNet(String configPath) { 037 try { 038 JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); 039 dict = Dictionary.getInstance(); 040 } 041 catch (JWNLException e) { 042 e.printStackTrace(); 043 } 044 } 045 046 public WordNet(InputStream propertiesStream) { 047 try { 048 JWNL.initialize(propertiesStream); 049 dict = Dictionary.getInstance(); 050 } 051 catch (JWNLException e) { 052 e.printStackTrace(); 053 } 054 } 055 056 public static void main(String[] args) { 057 System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); 058 System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); 059 System.out.println("Hypernyms **************************"); 060 System.out.println(new WordNet().getHypernyms(POS.NOUN, "man")); 061 System.out.println("Hyponyms ****************************"); 062 System.out.println(new WordNet().getHyponyms(POS.NOUN, "god")); 063 System.out.println("Words for first synset **************************"); 064 System.out.println(new WordNet().getWordsForFirstSynset(POS.NOUN, "man")); 065 066 } 067 068 public List<String> getBestSynonyms(POS pos, String s) { 069 070 List<String> synonyms = new ArrayList<>(); 071 072 try { 073 IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) 074// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); 075 if (iw != null) { 076 Synset[] synsets = iw.getSenses(); 077 Word[] words = synsets[0].getWords(); 078 for (Word w : words) { 079 String c = w.getLemma(); 080 if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { 081 synonyms.add(c); 082 } 083 } 084 } 085 086 } 087 catch (JWNLException e) { 088 e.printStackTrace(); 089 } 090 return synonyms; 091 } 092 093 /** 094 * Returns the lemmas for the top {@code n} synsets of the given POS for the string {@code s}. 095 * 096 * @param pos the part of speech to retrieve synonyms for 097 * @param s the string to retrieve synonyms for 098 * @param n the number of synonyms to retrieve 099 * @return list of the lemmas of the top n synonyms of s 100 */ 101 public List<String> getTopSynonyms(POS pos, String s, int n) { 102 103 List<String> synonyms = new ArrayList<>(); 104 105 try { 106 IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) 107// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); 108 if (iw != null) { 109 Synset[] synsets = iw.getSenses(); 110 for (int i = 0; i < Math.min(n, synsets.length); i++) { 111 for (Word word : synsets[i].getWords()) { 112 String c = word.getLemma(); 113 if (!c.equals(s) && !c.contains(" ")) { 114 synonyms.add(c); 115 } 116 } 117 } 118 } 119 120 } 121 catch (JWNLException e) { 122 e.printStackTrace(); 123 } 124 return synonyms; 125 } 126 127 public List<String> getAllSynonyms(POS pos, String s) { 128 List<String> synonyms = new ArrayList<>(); 129 try { 130 IndexWord iw = dict.getIndexWord(pos, s); 131 if (iw != null) { 132 Synset[] synsets = iw.getSenses(); 133 for (Synset synset : synsets) { 134 for (Word w : synset.getWords()) { 135 String lemma = w.getLemma(); 136 if (!lemma.equals(s) && !lemma.contains(" ")) { 137 synonyms.add(lemma); 138 } 139 } 140 } 141 } 142 } 143 catch (JWNLException e) { 144 e.printStackTrace(); 145 } 146 147 return synonyms; 148 } 149 150 public List<String> getSisterTerms(POS pos, String s) { 151 List<String> sisterTerms = new ArrayList<>(); 152 153 try { 154 IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) 155// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); 156 if (iw != null) { 157 Synset[] synsets = iw.getSenses(); 158 //System.out.println(synsets[0]); 159 PointerTarget[] pointerArr = synsets[0].getTargets(); 160 } 161 162 } 163 catch (JWNLException e) { 164 e.printStackTrace(); 165 } 166 return sisterTerms; 167 } 168 169 public List<String> getAttributes(String s) { 170 171 List<String> result = new ArrayList<>(); 172 173 try { 174 IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); 175 if (iw != null) { 176 Synset[] synsets = iw.getSenses(); 177 Word[] words = synsets[0].getWords(); 178 for (Word w : words) { 179 String c = w.getLemma(); 180 if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { 181 result.add(c); 182 } 183 } 184 } 185 186 } 187 catch (JWNLException e) { 188 e.printStackTrace(); 189 } 190 191 return result; 192 } 193 194 /** 195 * Returns a list of lemmas for the most frequent synset of the given word. 196 * @param word word to get synonyms for 197 * @param pos POS of the word to look up 198 * @return list of lemmas of the most frequent synset 199 */ 200 public List<String> getWordsForFirstSynset(POS pos, String word) { 201 List<String> result = new ArrayList<>(); 202 IndexWord indexWord = null; 203 Synset sense = null; 204 205 try { 206 indexWord = dict.getIndexWord(pos, word); 207 sense = indexWord.getSense(1); 208 for (Word w : sense.getWords()) { 209 result.add(w.getLemma()); 210 } 211 } 212 catch (JWNLException e) { 213 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 214 } 215 216 return result; 217 } 218 219 /** 220 * Returns a list of words being lemmas of a most frequent synset for the given word or one of its hypernyms. 221 */ 222 public List<String> getHypernyms(POS pos, String word) { 223 List<String> result = new ArrayList<>(); 224 225 IndexWord indexWord; 226 Synset sense; 227 228 try { 229 indexWord = dict.getIndexWord(pos, word); 230 if (indexWord == null) { 231 return result; 232 } 233 sense = indexWord.getSense(1); 234 for (Word w : sense.getWords()) { 235 result.add(w.getLemma()); 236 } 237 PointerTargetNodeList target = PointerUtils.getInstance().getDirectHypernyms(sense); 238 while (target != null && !target.isEmpty()) { 239 for (Object aTarget : target) { 240 Synset s = ((PointerTargetNode) aTarget).getSynset(); 241 for (Word w : sense.getWords()) { 242 result.add(w.getLemma()); 243 } 244 } 245 target = PointerUtils.getInstance().getDirectHyponyms(((PointerTargetNode) target.get(0)).getSynset()); 246 System.out.println(target); 247 } 248 } 249 catch (JWNLException e) { 250 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 251 } 252 253 return result; 254 } 255 256 public List<String> getHyponyms(POS pos, String s) { 257 ArrayList<String> result = new ArrayList<>(); 258 try { 259 IndexWord word = dict.getIndexWord(pos, s); 260 if (word == null) { 261 System.err.println("Unable to find index word for " + s); 262 return result; 263 } 264 Synset sense = word.getSense(1); 265 getHyponymsRecursive(result, sense, 3); 266 } 267 catch (JWNLException e) { 268 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 269 } 270 return result; 271 } 272 273 public void getHyponymsRecursive(List<String> lemmas, Synset sense, int depthToGo) { 274 for (Word w : sense.getWords()) { 275 lemmas.add(w.getLemma()); 276 } 277 if (depthToGo == 0) { 278 return; 279 } 280 try { 281 PointerTargetNodeList directHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); 282 for (Object directHyponym : directHyponyms) { 283 getHyponymsRecursive(lemmas, ((PointerTargetNode) directHyponym).getSynset(), depthToGo - 1); 284 } 285 } 286 catch (JWNLException e) { 287 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 288 } 289 } 290 291 public List<LemmaScorePair> getHyponymsScored(POS pos, String s) { 292 ArrayList<LemmaScorePair> result = new ArrayList<>(); 293 try { 294 IndexWord word = dict.getIndexWord(pos, s); 295 if (word == null) { 296 System.err.println("Unable to find index word for " + s); 297 return result; 298 } 299 Synset sense = word.getSense(1); 300 getHyponymsScoredRecursive(result, sense, 3, SYNONYM_FACTOR); 301 } 302 catch (JWNLException e) { 303 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 304 } 305 return result; 306 } 307 308 public void getHyponymsScoredRecursive(List<LemmaScorePair> lemmas, Synset sense, int depthToGo, double score) { 309 for (Word w : sense.getWords()) { 310 lemmas.add(new LemmaScorePair(w.getLemma(), score)); 311 } 312 if (depthToGo == 0) { 313 return; 314 } 315 try { 316 PointerTargetNodeList directHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); 317 for (Object directHyponym : directHyponyms) { 318 getHyponymsScoredRecursive(lemmas, ((PointerTargetNode) directHyponym).getSynset(), depthToGo - 1, 319 score * HYPONYM_FACTOR); 320 } 321 } 322 catch (JWNLException e) { 323 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 324 } 325 } 326 327 /** 328 * Funktion returns a List of Hypo and Hypernyms of a given string 329 * 330 * @param s Word for which you want to get Hypo and Hypersyms 331 * @return List of Hypo and Hypernyms 332 * @throws JWNLException 333 */ 334 public List<String> getRelatedNouns(String s) { 335 List<String> result = new ArrayList<>(); 336 IndexWord word = null; 337 Synset sense = null; 338 try { 339 word = dict.getIndexWord(POS.NOUN, s); 340 if (word != null) { 341 sense = word.getSense(1); 342 //Synset sense = word.getSense(1); 343 344 PointerTargetNodeList relatedListHypernyms = null; 345 PointerTargetNodeList relatedListHyponyms = null; 346 try { 347 relatedListHypernyms = PointerUtils.getInstance().getDirectHypernyms(sense); 348 } 349 catch (JWNLException e) { 350 // TODO Auto-generated catch block 351 e.printStackTrace(); 352 } 353 try { 354 relatedListHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); 355 } 356 catch (JWNLException e) { 357 // TODO Auto-generated catch block 358 e.printStackTrace(); 359 } 360 361 Iterator i = relatedListHypernyms.iterator(); 362 while (i.hasNext()) { 363 PointerTargetNode related = (PointerTargetNode) i.next(); 364 Synset s1 = related.getSynset(); 365 String tmp = (s1.toString()).replace(s1.getGloss(), ""); 366 tmp = tmp.replace(" -- ()]", ""); 367 tmp = tmp.replaceAll("[0-9]", ""); 368 tmp = tmp.replace("[Synset: [Offset: ", ""); 369 tmp = tmp.replace("] [POS: noun] Words: ", ""); 370 //its possible, that there is more than one word in a line from wordnet 371 String[] array_tmp = tmp.split(","); 372 for (String z : array_tmp) { 373 result.add(z.replace(" ", "")); 374 } 375 } 376 377 Iterator j = relatedListHyponyms.iterator(); 378 while (j.hasNext()) { 379 PointerTargetNode related = (PointerTargetNode) j.next(); 380 Synset s1 = related.getSynset(); 381 String tmp = (s1.toString()).replace(s1.getGloss(), ""); 382 tmp = tmp.replace(" -- ()]", ""); 383 tmp = tmp.replaceAll("[0-9]", ""); 384 tmp = tmp.replace("[Synset: [Offset: ", ""); 385 tmp = tmp.replace("] [POS: noun] Words: ", ""); 386 //its possible, that there is more than one word in a line from wordnet 387 String[] array_tmp = tmp.split(","); 388 for (String z : array_tmp) { 389 result.add(z.replace(" ", "")); 390 } 391 } 392 } 393 } 394 catch (JWNLException e) { 395 // TODO Auto-generated catch block 396 e.printStackTrace(); 397 } 398 399 return result; 400 } 401 402 public static class LemmaScorePair implements Comparable<LemmaScorePair> { 403 private String lemma; 404 private Double score; 405 406 @Override 407 public boolean equals(Object o) { 408 if (this == o) { 409 return true; 410 } 411 if (o == null || getClass() != o.getClass()) { 412 return false; 413 } 414 415 LemmaScorePair that = (LemmaScorePair) o; 416 417 if (lemma != null ? !lemma.equals(that.lemma) : that.lemma != null) { 418 return false; 419 } 420 if (score != null ? !score.equals(that.score) : that.score != null) { 421 return false; 422 } 423 424 return true; 425 } 426 427 @Override 428 public int hashCode() { 429 int result = lemma != null ? lemma.hashCode() : 0; 430 result = 31 * result + (score != null ? score.hashCode() : 0); 431 return result; 432 } 433 434 public String getLemma() { 435 436 return lemma; 437 } 438 439 public void setLemma(String lemma) { 440 this.lemma = lemma; 441 } 442 443 public Double getScore() { 444 return score; 445 } 446 447 public void setScore(Double score) { 448 this.score = score; 449 } 450 451 public LemmaScorePair(String lemma, Double score) { 452 453 this.lemma = lemma; 454 this.score = score; 455 } 456 457 @Override 458 public int compareTo(LemmaScorePair o) { 459 int val = score.compareTo(o.score); 460 461 if (val == 0) { 462 val = lemma.compareTo(o.getLemma()); 463 } 464 465 return val; 466 } 467 } 468 469}