package cn.edu.bjut.chapter2; import java.util.HashMap; import java.util.Map; public class Document { private String content; private Map tfMap; private StopwordChecker stopwordChecker; public Document(String content) { this.content = content; this.stopwordChecker = new StopwordChecker(); calculateTermFrequency(); } private void calculateTermFrequency() { this.tfMap = new HashMap(); int total = 0; for (String sentence : SentenceSpliterByOpenNLP.detect(content)) { for (String token : TokenizerByOpenNLP.tokenize(sentence)) { if (!stopwordChecker.check(token)) { total++; token = token.toLowerCase(); tfMap.put(token, tfMap.getOrDefault(token, 0.0) + 1); } } } for (String key : tfMap.keySet()) { tfMap.put(key, tfMap.get(key) / total); } } public Map getTermFrequency() { return tfMap; } }