package cn.edu.bjut.chapter5; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; public class SimilarityCalculator { public static double calculateCosineSimilarity(String text1, String text2) { Map vector1 = getWordFrequency(text1); Map vector2 = getWordFrequency(text2); double dotProduct = 0.0; for (String word : vector1.keySet()) { if (vector2.containsKey(word)) { dotProduct += vector1.get(word) * vector2.get(word); } } double magnitude1 = calculateMagnitude(vector1); double magnitude2 = calculateMagnitude(vector2); return dotProduct / (magnitude1 * magnitude2); } private static Map getWordFrequency(String text) { String[] words = text.toLowerCase().split("\\s+"); Map wordCount = new HashMap(); for (String word : words) { wordCount.put(word, wordCount.getOrDefault(word, 0) + 1); } return wordCount; } private static double calculateMagnitude(Map vector) { double sum = 0.0; for (int value : vector.values()) { sum += value * value; } return Math.sqrt(sum); } public static double calculateJaccardSimilarity(String text1, String text2) { Set set1 = new HashSet(Arrays.asList(text1.split(",\\s*"))); Set set2 = new HashSet(Arrays.asList(text2.split(",\\s*"))); Set intersection = new HashSet(set1); intersection.retainAll(set2); Set union = new HashSet(set1); union.addAll(set2); return (double) intersection.size() / union.size(); } public static int calculateLevenshteinDistance(String str1, String str2) { int[][] dp = new int[str1.length() + 1][str2.length() + 1]; for (int i = 0; i <= str1.length(); i++) { for (int j = 0; j <= str2.length(); j++) { if (i == 0) { dp[i][j] = j; } else if (j == 0) { dp[i][j] = i; } else { int cost = (str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 : 1; dp[i][j] = Math.min(Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost); } } } return dp[str1.length()][str2.length()]; } public static void main(String[] args) { { String text1 = "I love programming in Java"; String text2 = "Java programming is amazing"; double similarity = calculateCosineSimilarity(text1, text2); System.out.println("Cosine Similarity: " + similarity); } { String text1 = "cat, dog, mouse"; String text2 = "dog, elephant"; double similarity = calculateJaccardSimilarity(text1, text2); System.out.println("Jaccard Similarity: " + similarity); } { String str1 = "kitten"; String str2 = "sitting"; int distance = calculateLevenshteinDistance(str1, str2); System.out.println("Levenshtein Distance: " + distance); } } }