用户工具

站点工具


zh:courses:textmining2025:ch05

第五章:信息抽取

课件

下载:信息抽取

NERRecognizer类

NERRecognizer.java
package cn.edu.bjut.chapter6;
 
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
 
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
 
public class NERRecognizer {
	public static List<CoreLabel> recognize(String text) {
		// Set up the pipeline properties
		Properties props = new Properties();
		props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
		props.setProperty("outputFormat", "text");
 
		// Build the pipeline
		StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
		// Create an annotation object
		Annotation annotation = new Annotation(text);
		// Annotate the text
		pipeline.annotate(annotation);
 
		// Retrieve the tokens
		List<CoreMap> sentences = annotation.get(
				CoreAnnotations.SentencesAnnotation.class);
		List<CoreLabel> tokens = new ArrayList<CoreLabel>();
		for (CoreMap sentence : sentences) {
			tokens.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
		}
 
		return tokens;
	}
 
	public static void main(String[] args) {
		String text = "Apple is looking at buying U.K. startup for $1 billion.";
 
		List<CoreLabel> tokens = recognize(text);
		for (CoreLabel token : tokens) {
			//String word = token.word(); 
			//String ne = token.ner(); 
			String word = token.get(CoreAnnotations.TextAnnotation.class); 
			String ne = token.getString(CoreAnnotations.NamedEntityTagAnnotation.class);
 
			if (ne.equals("O")) {
				continue; 
			}
 
			System.out.println("Entity: " + word + " (Type: " + ne + ")");
		}
	}
}

课程报告

评论

请输入您的评论. 可以使用维基语法:
168᠎ -6 = 
 
zh/courses/textmining2025/ch05.txt · 最后更改: 2025/10/20 21:40 由 pzczxs