package cn.edu.bjut.chapter2; import java.util.ArrayList; import java.util.List; import java.util.Properties; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.util.CoreMap; public class Lemmatizer { public static List tag(String text) { // Set up the pipeline properties Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma"); props.setProperty("outputFormat", "text"); // Build the pipeline StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // Create an annotation object Annotation annotation = new Annotation(text); // Annotate the text pipeline.annotate(annotation); // Retrieve the tokens List sentences = annotation.get( CoreAnnotations.SentencesAnnotation.class); List tokens = new ArrayList(); for (CoreMap sentence : sentences) { tokens.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class)); } return tokens; } public static void main(String[] args) { String text = "Natural Language Processing (NLP) is essential " + "for text analysis and text mining."; List tokens = tag(text); for (CoreLabel token : tokens) { System.out.println(token.word() + "-->" + token.lemma()); } } }