From 6faee48f35f74cf2cb0ddaaa40283f23952ff467 Mon Sep 17 00:00:00 2001 From: Alex Odinayev Date: Mon, 18 May 2020 10:08:16 -0400 Subject: [PATCH] added freq02.py --- src/freq02.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/freq02.py diff --git a/src/freq02.py b/src/freq02.py new file mode 100644 index 0000000..c3c624f --- /dev/null +++ b/src/freq02.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +import re +import mmap +import sys +from collections import defaultdict + +def usage(): + print(f'{sys.argv[0]} ') + sys.exit(1) + +def main(): + if len(sys.argv) != 3: + usage() + + xpr = re.compile(b'[a-z]+', re.I) + with open(sys.argv[1]) as fh: + fh.seek(0, 2) + fsz = fh.tell() + data = mmap.mmap(fh.fileno(), fsz, access=mmap.ACCESS_READ) + words = xpr.finditer(data) + + freq = defaultdict(int) + for w in words: + freq[w[0].lower()] += 1 + + with open(sys.argv[2], "w") as out: + for k, v in sorted(freq.items(), key=lambda u: (-u[1], u[0])): + out.write('%d %s\n' % (v, k.decode(encoding='UTF-8'))) + +if __name__ == '__main__': + main()