package lingscope.algorithms;

import edu.stanford.nlp.tagger.maxent.MaxentTagger; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger;

/**

* Part of speech tagger
* @author shashank
*/

public class PosTagger {

private MaxentTagger posTagger;

/**
 * Creates an instance of POS tagger by loading the given grammar file
 * @param grammarFile
 */
public PosTagger(String grammarFile) {
    try {
        posTagger = new MaxentTagger(grammarFile);
    } catch (Exception ex) {
        Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
    }
}

/**
 * Takes a sentence as input and returns list of POS tags associated with
 * each word in the sentence
 * @param sentence
 * @param isTokenized
 * @return
 */
public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) {
    if (!isTokenized) {
        sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
    }
    List<String> ret = new ArrayList<String>();
    String tagged = "";
    try {
        tagged = posTagger.tagString(sentence);
    } catch (Exception ex) {
        Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex);
    }
    for (String wordTag : tagged.split(" +")) {
        String[] tags = wordTag.split("/");
        String tag = tags[tags.length - 1];
        ret.add(tag);
    }
    return ret;
}

}