From 843d4639907515b59c759478d720db74b807f61b Mon Sep 17 00:00:00 2001 From: "oleksii.tumanov" Date: Wed, 4 Mar 2026 03:40:56 -0600 Subject: [PATCH 1/2] feat: add TopKFrequentWords with deterministic tie-breaking --- .../strings/TopKFrequentWords.java | 57 ++++++++++++++++++ .../strings/TopKFrequentWordsTest.java | 60 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/main/java/com/thealgorithms/strings/TopKFrequentWords.java create mode 100644 src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java diff --git a/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java new file mode 100644 index 000000000000..0f89c5e82b5e --- /dev/null +++ b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java @@ -0,0 +1,57 @@ +package com.thealgorithms.strings; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Utility class to find the top-k most frequent words. + * + *

Words are ranked by frequency in descending order. For equal frequencies, + * words are ranked in lexicographical ascending order. + * + */ +public final class TopKFrequentWords { + private TopKFrequentWords() { + } + + /** + * Finds the k most frequent words. + * + * @param words input array of words + * @param k number of words to return + * @return list of top-k words ordered by frequency then lexicographical order + * @throws IllegalArgumentException if words is null, k is negative, or words contains null + */ + public static List findTopKFrequentWords(String[] words, int k) { + if (words == null) { + throw new IllegalArgumentException("Input words array cannot be null."); + } + if (k < 0) { + throw new IllegalArgumentException("k cannot be negative."); + } + if (k == 0 || words.length == 0) { + return List.of(); + } + + Map frequency = new HashMap<>(); + for (String word : words) { + if (word == null) { + throw new IllegalArgumentException("Input words cannot contain null values."); + } + frequency.put(word, frequency.getOrDefault(word, 0) + 1); + } + + List candidates = new ArrayList<>(frequency.keySet()); + candidates.sort( + Comparator.comparingInt(frequency::get) + .reversed() + .thenComparing(Comparator.naturalOrder()) + ); + + int limit = Math.min(k, candidates.size()); + return new ArrayList<>(candidates.subList(0, limit)); + } +} diff --git a/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java new file mode 100644 index 000000000000..c1ee453feed5 --- /dev/null +++ b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java @@ -0,0 +1,60 @@ +package com.thealgorithms.strings; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +class TopKFrequentWordsTest { + + @ParameterizedTest + @MethodSource("validTestCases") + void testFindTopKFrequentWords(String[] words, int k, List expected) { + assertEquals(expected, TopKFrequentWords.findTopKFrequentWords(words, k)); + } + + static Stream validTestCases() { + return Stream.of( + Arguments.of( + new String[] {"i", "love", "leetcode", "i", "love", "coding"}, + 2, + List.of("i", "love") + ), + Arguments.of( + new String[] {"the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"}, + 4, + List.of("the", "is", "sunny", "day") + ), + Arguments.of( + new String[] {"bbb", "aaa", "bbb", "aaa", "ccc"}, + 2, + List.of("aaa", "bbb") + ), + Arguments.of( + new String[] {"one", "two", "three"}, + 10, + List.of("one", "three", "two") + ), + Arguments.of(new String[] {}, 3, List.of()), + Arguments.of(new String[] {"x", "x", "y"}, 0, List.of()) + ); + } + + @ParameterizedTest + @MethodSource("invalidTestCases") + void testFindTopKFrequentWordsInvalidInput(String[] words, int k) { + assertThrows(IllegalArgumentException.class, () -> TopKFrequentWords.findTopKFrequentWords(words, k)); + } + + static Stream invalidTestCases() { + return Stream.of( + Arguments.of((String[]) null, 1), + Arguments.of(new String[] {"a", null, "b"}, 2), + Arguments.of(new String[] {"a"}, -1) + ); + } +} From 1a0c104990d8bf45a8c7aace02d12233fa362e09 Mon Sep 17 00:00:00 2001 From: "oleksii.tumanov" Date: Wed, 4 Mar 2026 03:59:54 -0600 Subject: [PATCH 2/2] style: format TopKFrequentWords files with clang-format --- .../strings/TopKFrequentWords.java | 9 +++--- .../strings/TopKFrequentWordsTest.java | 32 ++----------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java index 0f89c5e82b5e..106de304cf40 100644 --- a/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java +++ b/src/main/java/com/thealgorithms/strings/TopKFrequentWords.java @@ -12,6 +12,9 @@ *

Words are ranked by frequency in descending order. For equal frequencies, * words are ranked in lexicographical ascending order. * + *

Reference: + * https://en.wikipedia.org/wiki/Top-k_problem + * */ public final class TopKFrequentWords { private TopKFrequentWords() { @@ -45,11 +48,7 @@ public static List findTopKFrequentWords(String[] words, int k) { } List candidates = new ArrayList<>(frequency.keySet()); - candidates.sort( - Comparator.comparingInt(frequency::get) - .reversed() - .thenComparing(Comparator.naturalOrder()) - ); + candidates.sort(Comparator.comparingInt(frequency::get).reversed().thenComparing(Comparator.naturalOrder())); int limit = Math.min(k, candidates.size()); return new ArrayList<>(candidates.subList(0, limit)); diff --git a/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java index c1ee453feed5..42b2d04ff265 100644 --- a/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java +++ b/src/test/java/com/thealgorithms/strings/TopKFrequentWordsTest.java @@ -18,30 +18,8 @@ void testFindTopKFrequentWords(String[] words, int k, List expected) { } static Stream validTestCases() { - return Stream.of( - Arguments.of( - new String[] {"i", "love", "leetcode", "i", "love", "coding"}, - 2, - List.of("i", "love") - ), - Arguments.of( - new String[] {"the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"}, - 4, - List.of("the", "is", "sunny", "day") - ), - Arguments.of( - new String[] {"bbb", "aaa", "bbb", "aaa", "ccc"}, - 2, - List.of("aaa", "bbb") - ), - Arguments.of( - new String[] {"one", "two", "three"}, - 10, - List.of("one", "three", "two") - ), - Arguments.of(new String[] {}, 3, List.of()), - Arguments.of(new String[] {"x", "x", "y"}, 0, List.of()) - ); + return Stream.of(Arguments.of(new String[] {"i", "love", "leetcode", "i", "love", "coding"}, 2, List.of("i", "love")), Arguments.of(new String[] {"the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"}, 4, List.of("the", "is", "sunny", "day")), + Arguments.of(new String[] {"bbb", "aaa", "bbb", "aaa", "ccc"}, 2, List.of("aaa", "bbb")), Arguments.of(new String[] {"one", "two", "three"}, 10, List.of("one", "three", "two")), Arguments.of(new String[] {}, 3, List.of()), Arguments.of(new String[] {"x", "x", "y"}, 0, List.of())); } @ParameterizedTest @@ -51,10 +29,6 @@ void testFindTopKFrequentWordsInvalidInput(String[] words, int k) { } static Stream invalidTestCases() { - return Stream.of( - Arguments.of((String[]) null, 1), - Arguments.of(new String[] {"a", null, "b"}, 2), - Arguments.of(new String[] {"a"}, -1) - ); + return Stream.of(Arguments.of((String[]) null, 1), Arguments.of(new String[] {"a", null, "b"}, 2), Arguments.of(new String[] {"a"}, -1)); } }