diff --git a/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java new file mode 100644 index 000000000000..106de304cf40 --- /dev/null +++ b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java @@ -0,0 +1,56 @@ +package com.thealgorithms.strings; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Utility class to find the top-k most frequent words. + * + *

Words are ranked by frequency in descending order. For equal frequencies, + * words are ranked in lexicographical ascending order. + * + *

Reference: + * https://en.wikipedia.org/wiki/Top-k_problem + * + */ +public final class TopKFrequentWords { + private TopKFrequentWords() { + } + + /** + * Finds the k most frequent words. + * + * @param words input array of words + * @param k number of words to return + * @return list of top-k words ordered by frequency then lexicographical order + * @throws IllegalArgumentException if words is null, k is negative, or words contains null + */ + public static List findTopKFrequentWords(String[] words, int k) { + if (words == null) { + throw new IllegalArgumentException("Input words array cannot be null."); + } + if (k < 0) { + throw new IllegalArgumentException("k cannot be negative."); + } + if (k == 0 || words.length == 0) { + return List.of(); + } + + Map frequency = new HashMap<>(); + for (String word : words) { + if (word == null) { + throw new IllegalArgumentException("Input words cannot contain null values."); + } + frequency.put(word, frequency.getOrDefault(word, 0) + 1); + } + + List candidates = new ArrayList<>(frequency.keySet()); + candidates.sort(Comparator.comparingInt(frequency::get).reversed().thenComparing(Comparator.naturalOrder())); + + int limit = Math.min(k, candidates.size()); + return new ArrayList<>(candidates.subList(0, limit)); + } +} diff --git a/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java new file mode 100644 index 000000000000..42b2d04ff265 --- /dev/null +++ b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java @@ -0,0 +1,34 @@ +package com.thealgorithms.strings; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +class TopKFrequentWordsTest { + + @ParameterizedTest + @MethodSource("validTestCases") + void testFindTopKFrequentWords(String[] words, int k, List expected) { + assertEquals(expected, TopKFrequentWords.findTopKFrequentWords(words, k)); + } + + static Stream validTestCases() { + return Stream.of(Arguments.of(new String[] {"i", "love", "leetcode", "i", "love", "coding"}, 2, List.of("i", "love")), Arguments.of(new String[] {"the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"}, 4, List.of("the", "is", "sunny", "day")), + Arguments.of(new String[] {"bbb", "aaa", "bbb", "aaa", "ccc"}, 2, List.of("aaa", "bbb")), Arguments.of(new String[] {"one", "two", "three"}, 10, List.of("one", "three", "two")), Arguments.of(new String[] {}, 3, List.of()), Arguments.of(new String[] {"x", "x", "y"}, 0, List.of())); + } + + @ParameterizedTest + @MethodSource("invalidTestCases") + void testFindTopKFrequentWordsInvalidInput(String[] words, int k) { + assertThrows(IllegalArgumentException.class, () -> TopKFrequentWords.findTopKFrequentWords(words, k)); + } + + static Stream invalidTestCases() { + return Stream.of(Arguments.of((String[]) null, 1), Arguments.of(new String[] {"a", null, "b"}, 2), Arguments.of(new String[] {"a"}, -1)); + } +}