下载:信息抽取
package cn.edu.bjut.chapter6; import java.util.ArrayList; import java.util.List; import java.util.Properties; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.util.CoreMap; public class NERRecognizer { public static List<CoreLabel> recognize(String text) { // Set up the pipeline properties Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner"); props.setProperty("outputFormat", "text"); // Build the pipeline StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // Create an annotation object Annotation annotation = new Annotation(text); // Annotate the text pipeline.annotate(annotation); // Retrieve the tokens List<CoreMap> sentences = annotation.get( CoreAnnotations.SentencesAnnotation.class); List<CoreLabel> tokens = new ArrayList<CoreLabel>(); for (CoreMap sentence : sentences) { tokens.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class)); } return tokens; } public static void main(String[] args) { String text = "Apple is looking at buying U.K. startup for $1 billion."; List<CoreLabel> tokens = recognize(text); for (CoreLabel token : tokens) { //String word = token.word(); //String ne = token.ner(); String word = token.get(CoreAnnotations.TextAnnotation.class); String ne = token.getString(CoreAnnotations.NamedEntityTagAnnotation.class); if (ne.equals("O")) { continue; } System.out.println("Entity: " + word + " (Type: " + ne + ")"); } } }
评论