package lingscope.algorithms;

import abner.Tagger; import abner.Trainer; import java.io.File; import lingscope.structures.AnnotatedSentence;

/**

* A CRF based annotator
* @author shashank
*/

public class CrfAnnotator extends Annotator {

private Tagger tagger;

public CrfAnnotator(String beginTag, String interTag, String otherTag) {
    super(beginTag, interTag, otherTag);
    tagger = null;
}

@Override
public void serializeAnnotator(String trainingFile, String modelFile) {
    Trainer trainer = new Trainer();
    trainer.train(trainingFile, modelFile);
    loadAnnotator(modelFile);
}

@Override
public AnnotatedSentence annotateSentence(String sentence, boolean isTokenized) {
    if (tagger == null) {
        throw new RuntimeException("Tagger has not been loaded");
    }
    if (!isTokenized) {
        sentence = AbnerTokenizer.splitTermsByPunctuation(sentence);
    }
    String raw = tagger.tagABNER(sentence).trim();
    return new AnnotatedSentence(raw);
}

@Override
public void loadAnnotator(String modelFile) {
    tagger = new Tagger(new File(modelFile));
    tagger.setTokenization(false);
}

}