package lingscope.drivers;

import generalutils.FileOperations; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import lingscope.algorithms.Annotator; import lingscope.algorithms.BaselineCueAnnotator; import lingscope.algorithms.BaselineScopeAnnotator; import lingscope.algorithms.CrfAnnotator; import lingscope.algorithms.NegexCueAnnotator; import lingscope.algorithms.NegexScopeAnnotator; import lingscope.io.AnnotatedSentencesIO; import lingscope.structures.AnnotatedSentence;

/**

* Tags scope or cue in a single sentence
* @author shashank
*/

public class SentenceTagger {

public static final String CUE_START = "B-C";
public static final String CUE_INTER = "I-C";
public static final String SCOPE_START = "B-S";
public static final String SCOPE_INTER = "I-S";
public static final String OTHER = "O";

/**
 * Tags the given sentence with the given annotator
 * @param annotator
 * @param sentence
 * @param isTokenized
 * @return
 */
public static AnnotatedSentence tag(Annotator annotator, String sentence, boolean isTokenized) {
    return annotator.annotateSentence(sentence, isTokenized);
}

/**
 * Prints the usage for the sentence tagger
 */
public static void usage() {
    System.out.println("Usage:\njava lingscope.drivers.SentenceTagger (cue|scope) (crf|baseline|negex) saved_model_file sentence_to_tag");
    System.out.println("\tSaved model for negation can be obtained from http://negscope.askhermes.org/");
    System.out.println("\tSaved model for speculation can be obtained from http://hedgescope.askhermes.org/");
    System.out.println("\tSaved model for NegEx can be obtained from http://code.google.com/p/negex/downloads/list");
}

/**
 * Given a list of annotated sentences, return a list where the annotated 
 * sentences are replaced with strings
 * @param annotatedSentences
 * @return
 */
public static List<String> getStringListFromAnnotatedSentences(List<AnnotatedSentence> annotatedSentences) {
    List<String> strings = new ArrayList<String>(annotatedSentences.size());
    for (AnnotatedSentence annotatedSentence : annotatedSentences) {
        strings.add(annotatedSentence.getSentenceText());
    }
    return strings;
}

public static List<AnnotatedSentence> annotateSentences(Annotator annotator, List<String> inputSentences, boolean isTokenized) {
    List<AnnotatedSentence> outputSentences = new ArrayList<AnnotatedSentence>(inputSentences.size());
    for (String inputSentence : inputSentences) {
        AnnotatedSentence outputSentence = annotator.annotateSentence(inputSentence, isTokenized);
        outputSentences.add(outputSentence);
    }
    return outputSentences;
}

/**
 * Gets the list of sentences in string format from the given inputFile.
 * @param inputFile the file containing input sentences
 * @param isAnnotated set this as true if the input file contains annotated
 * sentences. Set this as false if the input file contains only sentences
 * as strings
 * @return the list of sentences in string format in the given inputFile
 */
public static List<String> getStringList(String inputFile, boolean isAnnotated) {
    if (!isAnnotated) {
        try {
            return FileOperations.readFile(inputFile);
        } catch (Exception ex) {
            Logger.getLogger(SentenceTagger.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    List<AnnotatedSentence> annotatedSentences = AnnotatedSentencesIO.read(inputFile);
    return getStringListFromAnnotatedSentences(annotatedSentences);
}

/**
 * Gets the Annotator from the given classifier and mark type
 * @param classifierType such as "baseline", "crf", or "negex"
 * @param markType
 * @return
 */
public static Annotator getAnnotator(String classifierType, String markType) {
    if ("cue".equalsIgnoreCase(markType)) {
        if ("baseline".equalsIgnoreCase(classifierType)) {
            return new BaselineCueAnnotator(CUE_START, CUE_INTER, OTHER);
        } else if ("crf".equalsIgnoreCase(classifierType)) {
            return new CrfAnnotator(CUE_START, CUE_INTER, OTHER);
        } else if ("negex".equalsIgnoreCase(classifierType)) {
            return new NegexCueAnnotator(CUE_START, CUE_INTER, OTHER);
        }
        return null;
    } else if ("scope".equalsIgnoreCase(markType)) {
        if ("baseline".equalsIgnoreCase(classifierType)) {
            return new BaselineScopeAnnotator(SCOPE_START, SCOPE_INTER, OTHER, true, true);
        } else if ("crf".equalsIgnoreCase(classifierType)) {
            return new CrfAnnotator(SCOPE_START, SCOPE_INTER, OTHER);
        } else if ("negex".equalsIgnoreCase(classifierType)) {
            return new NegexScopeAnnotator(SCOPE_START, SCOPE_INTER, OTHER);
        }
        return null;
    }
    return null;
}

/**
 *
 * @param args
 * 0 - Annotator type ("cue" or "scope")
 * 1 - Model type ("crf", "baseline" or "negex")
 * 2 - Saved model file
 * 3 - Sentence to tag
 */
public static void main(String[] args) {
    if (args.length < 4) {
        usage();
        System.exit(0);
    }
    Annotator annotator = getAnnotator(args[1], args[0]);
    if (annotator == null) {
        usage();
        System.exit(1);
    }
    annotator.loadAnnotator(args[2]);

    if ("file".equalsIgnoreCase(args[3])) {
        String fileName = args[4];
        try {
            List<String> sentences = FileOperations.readFile(fileName);
            for (String sentence : sentences) {
                AnnotatedSentence annotatedSentence = tag(annotator, sentence, false);
                System.out.println(annotatedSentence.getRawText());
            }
        } catch (Exception ex) {
            ex.printStackTrace(System.err);
        }
    } else {
        AnnotatedSentence sentence = tag(annotator, args[3], false);
        System.out.println(sentence.getRawText());
    }
}

}