package cn.edu.bjut.chapter2; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashSet; import java.util.Set; public class StopwordChecker { private Set stopwords; private String fname = "resource/stoplist.txt"; private boolean caseSensitive; public StopwordChecker(boolean caseSensitive) { this.stopwords = new HashSet(); this.caseSensitive = caseSensitive; load(this.fname); } public StopwordChecker() { this(false); } private void load(final String fname) { try { BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(fname), "UTF-8")); for (String line = null; (line = reader.readLine()) != null;) { line = line.trim(); stopwords.add(caseSensitive ? line : line.toLowerCase()); } reader.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public boolean check(String word) { return stopwords.contains(caseSensitive ? word : word.toLowerCase()); } public static void main(String[] args) { StopwordChecker checker = new StopwordChecker(); String[] tokens = { "Text", "analysis", "and", "text", "mining", "are", "amazing", "!" }; for (String token : tokens) { System.out.println(token + ": " + checker.check(token)); } } }