-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathremoveDuplicateLines.py
More file actions
24 lines (19 loc) · 914 Bytes
/
removeDuplicateLines.py
File metadata and controls
24 lines (19 loc) · 914 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/env python3
import os
import sys
import argparse
PY3 = sys.version_info[0] == 3
if not PY3:
print("This program has only been tested on Python 3.6+, you are using an unknown version of python.")
sys.exit(0)
parser = argparse.ArgumentParser(description='remove duplicates in supplied wordlist')
parser.add_argument('wordlist', type=argparse.FileType('r'), help='The wordlist you would like to remove duplicates from')
parser.add_argument('wordlist_clean', type=argparse.FileType('w'), help='The file to store the cleaned wordlist')
args = parser.parse_args()
#from https://blog.georgechalhoub.com/2015/09/remove-duplicate-lines-from-file-using.html
lines_seen = set() # holds lines already seen
for line in args.wordlist:
if line not in lines_seen: # not a duplicate
args.wordlist_clean.write(line)
lines_seen.add(line)
args.wordlist_clean.close()