Skip to content

Commit f084499

Browse files
committed
feat(day 119): implement consecutive word compression algorithm with proper run-length grouping
1 parent fdd8485 commit f084499

1 file changed

Lines changed: 82 additions & 0 deletions

File tree

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
3+
String Compression
4+
Given a string sentence, return a compressed version of the sentence where consecutive duplicate words are replaced by the word followed with the number of times it repeats in parentheses.
5+
6+
Only consecutive duplicates are compressed.
7+
Words are separated by single spaces.
8+
For example, given "yes yes yes please", return "yes(3) please".
9+
"""
10+
11+
import unittest
12+
13+
class StringCompressionTest(unittest.TestCase):
14+
15+
def test1(self):
16+
self.assertEqual(compress_string("yes yes yes please"), "yes(3) please")
17+
18+
def test2(self):
19+
self.assertEqual(compress_string("I have have have apples"),"I have(3) apples")
20+
21+
def test3(self):
22+
self.assertEqual(compress_string("one one three and to the the the the"),"one(2) three and to the(4)")
23+
24+
def test4(self):
25+
self.assertEqual(compress_string("route route route route route route tee tee tee tee tee tee"),"route(6) tee(6)")
26+
27+
28+
"""
29+
Expected: "go stop go stop" (no compression, because duplicates aren't consecutive).
30+
This code compresses non-consecutive duplicates incorrectly.
31+
32+
Issuse:
33+
1. Global counting instead of consecutive counting.
34+
=> This code compressing all duplicates, not just consective ones.
35+
2. Order preservation
36+
=> Using a dictionary(words) loses the original order of
37+
appearance in python < 3.7. In modern Python dicts preserve insertion order, but it still doesn't capture
38+
consecutive runs.
39+
40+
3. Efficiency
41+
=> sentence.count(word) scans the whole string for each
42+
word -> O(n2) complexity.
43+
"""
44+
def compress_string(sentence):
45+
words = {}
46+
res = ""
47+
for word in sentence.split():
48+
if word not in words:
49+
words[word] = sentence.count(word)
50+
51+
52+
for word, count in words.items():
53+
if count > 1:
54+
res += f"{word}({count}) "
55+
else:
56+
res += f"{word} "
57+
58+
return res.rstrip()
59+
60+
def compress_string(sentence):
61+
words = sentence.split()
62+
if not words:
63+
return ""
64+
65+
result = []
66+
count = 1
67+
68+
for i in range(1, len(words) + 1):
69+
if i < len(words) and words[i] == words[i - 1]:
70+
count += 1
71+
else:
72+
if count > 1:
73+
result.append(f"{words[i - 1]}({count})")
74+
else:
75+
result.append(words[i-1])
76+
count = 1
77+
return " ".join(result)
78+
79+
80+
if __name__ == "__main__":
81+
print(compress_string("yes yes yes please"))
82+
unittest.main()

0 commit comments

Comments
 (0)