package cn.edu.bjut.chapter2; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import opennlp.tools.tokenize.TokenizerME; import opennlp.tools.tokenize.TokenizerModel; public class TokenizerByOpenNLP { private static TokenizerME tokenizer = null; private static final String MODEL_FILE = "resource/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin"; private static void loadModel(String fname) { TokenizerModel model = null; try { InputStream stream = new FileInputStream(fname); try { model = new TokenizerModel(stream); } catch (IOException e) { e.printStackTrace(); } finally { if (stream != null) { try { stream.close(); } catch (IOException e) { e.printStackTrace(); } } } tokenizer = new TokenizerME(model); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static String[] tokenize(String str) { if (tokenizer == null) { loadModel(MODEL_FILE); } return tokenizer.tokenize(str); } public static void main(String[] args) { String str = "Natural Language Processing (NLP) is essential " + "for text analysis and text mining."; String[] tokens = tokenize(str); System.out.println("Tokens tokenized by OpenNLP: "); for (int i = 0; i < tokens.length; i++) { System.out.println(tokens[i]); } } }