package cn.edu.bjut.chapter2;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;

public class Lemmatizer {
	public static List<CoreLabel> tag(String text) {
		// Set up the pipeline properties
		Properties props = new Properties();
		props.setProperty("annotators", "tokenize,ssplit,pos,lemma");
		props.setProperty("outputFormat", "text");

		// Build the pipeline
		StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
		// Create an annotation object
		Annotation annotation = new Annotation(text);
		// Annotate the text
		pipeline.annotate(annotation);

		// Retrieve the tokens
		List<CoreMap> sentences = annotation.get(
				CoreAnnotations.SentencesAnnotation.class);
		List<CoreLabel> tokens = new ArrayList<CoreLabel>();
		for (CoreMap sentence : sentences) {
			tokens.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
		}

		return tokens;
	}

	public static void main(String[] args) {
		String text = "Natural Language Processing (NLP) is essential "
				+ "for text analysis and text mining.";

		List<CoreLabel> tokens = tag(text);
		for (CoreLabel token : tokens) {
			System.out.println(token.word() + "-->" + token.lemma());
		}
	}
}