001/**
002 * 
003 */
004package org.dllearner.algorithms.isle.index;
005
006import java.io.Serializable;
007import java.util.Collections;
008import java.util.HashMap;
009import java.util.Map;
010import java.util.Set;
011
012import com.google.common.collect.ComparisonChain;
013
014/**
015 * @author Lorenz Buehmann
016 *
017 */
018public class Token implements Comparable<Token>, Serializable{
019        
020        private String rawForm;
021        private String stemmedForm;
022        private String posTag;
023        private boolean isPunctuation;
024        private boolean isStopWord;
025        private boolean isHead;
026    /// for storing alternative forms of this token, e.g., generated by WordNet synonyms
027    private HashMap<String, Double> alternativeForms;
028
029        
030        public Token(String rawForm) {
031                this.rawForm = rawForm;
032        }
033        
034        public Token(String rawForm, String stemmedForm, String posTag, boolean isPunctuation, boolean isStopWord) {
035                this.rawForm = rawForm;
036                this.stemmedForm = stemmedForm;
037                this.posTag = posTag;
038                this.isPunctuation = isPunctuation;
039                this.isStopWord = isStopWord;
040        this.alternativeForms = new HashMap<>();
041        }
042        
043        /**
044         * @return the rawForm
045         */
046        public String getRawForm() {
047                return rawForm;
048        }
049        
050        /**
051         * @return the stemmedForm
052         */
053        public String getStemmedForm() {
054                return stemmedForm;
055        }
056        
057        /**
058         * @return the posTag
059         */
060        public String getPOSTag() {
061                return posTag;
062        }
063
064    /**
065     * Returns the unmodifiable list of alternative surface forms for this token. These alternative forms might be
066     * generated by, e.g., WordNet synonym expansion.
067     *
068     * @return unmodifiable set of alternative surface forms for this token
069     */
070    public Set<String> getAlternativeForms() {
071        return Collections.unmodifiableSet(alternativeForms.keySet());
072    }
073
074    /**
075     * Returns the map storing the scored alternative forms of this token.
076     */
077    public Map<String, Double> getScoredAlternativeForms() {
078        return Collections.unmodifiableMap(alternativeForms);
079    }
080
081    /**
082     * Adds a new surface form to the alternative forms of this token. Alternative forms are included in comparison of
083     * two tokens when using the {@link #equalsWithAlternativeForms}.
084     */
085    public void addAlternativeForm(String alternativeForm, Double score) {
086        this.alternativeForms.put(alternativeForm, score);
087    }
088
089    /**
090         * @return the isPunctuation
091         */
092        public boolean isPunctuation() {
093                return isPunctuation;
094        }
095        
096        /**
097         * @return the isStopWord
098         */
099        public boolean isStopWord() {
100                return isStopWord;
101        }
102        
103        /**
104         * @param stemmedForm the stemmedForm to set
105         */
106        public void setStemmedForm(String stemmedForm) {
107                this.stemmedForm = stemmedForm;
108        }
109        
110        /**
111         * @param posTag the posTag to set
112         */
113        public void setPOSTag(String posTag) {
114                this.posTag = posTag;
115        }
116        
117        /**
118         * @param isPunctuation the isPunctuation to set
119         */
120        public void setIsPunctuation(boolean isPunctuation) {
121                this.isPunctuation = isPunctuation;
122        }
123        
124        /**
125         * @param isStopWord the isStopWord to set
126         */
127        public void setIsStopWord(boolean isStopWord) {
128                this.isStopWord = isStopWord;
129        }
130        
131        /**
132         * @param isHead the token is the head of the containg sequence of tokens
133         */
134        public void setIsHead(boolean isHead) {
135                this.isHead = isHead;
136        }
137        
138        /**
139         * @return the isHead
140         */
141        public boolean isHead() {
142                return isHead;
143        }
144
145        /* (non-Javadoc)
146         * @see java.lang.Object#toString()
147         */
148        @Override
149        public String toString() {
150                return "[Word: " + rawForm + " | Stemmed word: " + stemmedForm + " | POS tag: " + posTag + " | Alternatives: " + alternativeForms.toString() + "]";
151        }
152
153    /**
154     * Compares the given token to this one including alternative forms. This means that tokens are considered to be
155     * equal iff the POS tags is the same and if the intersection of all surface forms (stemmed forms + alternative
156     * forms) is not empty.
157     *
158     * @param other    token to compare this token to
159     * @return true if tokens are equal considering alternative forms, otherwise false
160     */
161    public boolean equalsWithAlternativeForms(Token other) {
162        if (this == other) {
163            return true;
164        }
165
166        if (!posTag.equals(other.posTag)) {
167            return false;
168        }
169
170        if (other.stemmedForm.equals(stemmedForm) || other.alternativeForms.containsKey(stemmedForm) ||
171                alternativeForms.containsKey(other.stemmedForm)) {
172            return true;
173        }
174
175        return false;
176    }
177
178    @Override
179    public boolean equals(Object o) {
180        if (this == o) {
181            return true;
182        }
183        if (o == null || getClass() != o.getClass()) {
184            return false;
185        }
186
187        Token token = (Token) o;
188
189        if (!WordTypeComparator.sameWordType(posTag, token.posTag)) {
190            return false;
191        }
192        if (!stemmedForm.equals(token.stemmedForm)) {
193            return false;
194        }
195
196        return true;
197    }
198
199    @Override
200    public int hashCode() {
201        int result = stemmedForm.hashCode();
202        result = 31 * result + WordTypeComparator.hashCode(posTag);
203        return result;
204    }
205
206        /* (non-Javadoc)
207         * @see java.lang.Comparable#compareTo(java.lang.Object)
208         */
209        @Override
210        public int compareTo(Token other) {
211                return ComparisonChain.start()
212                                .compare(this.rawForm, other.rawForm)
213                                .compare(this.posTag, other.posTag)
214                                .result();
215        }
216}