package cn.edu.bjut.chapter2; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; public class TFIDFAnalyzer { private List documents; public TFIDFAnalyzer(List documents) { this.documents = documents; } public Map calculateIDF() { Map idf = new HashMap(); int totalDocuments = documents.size(); for (Document document : documents) { Set tokens = document.getTermFrequency().keySet(); for (String token : tokens) { idf.put(token, idf.getOrDefault(token, 0.0) + 1); } } for (String key : idf.keySet()) { idf.put(key, Math.log(totalDocuments / idf.get(key))); } return idf; } public Map calculateTFIDF() { Map tfidf = new HashMap(); Map idf = calculateIDF(); for (Document document : documents) { Map tf = document.getTermFrequency(); for (String term : tf.keySet()) { double tfidfValue = tf.get(term) * idf.getOrDefault(term, 0.0); tfidf.put(term, tfidf.getOrDefault(term, 0.0) + tfidfValue); } } return tfidf; } public static void main(String[] args) { List documents = new ArrayList(); documents.add(new Document("A rose is very beautiful.")); documents.add(new Document("A rose is a flower, which is a rose.")); TFIDFAnalyzer tfidfAnalyzer = new TFIDFAnalyzer(documents); Map tfidf = tfidfAnalyzer.calculateTFIDF(); for (Map.Entry entry : tfidf.entrySet()) { System.out.println(entry.getKey() + ": " + entry.getValue()); } } }