Skip to content

Commit 437aa09

Browse files
Update 0.4.5
- reduced console spam in keyword mode - fixed a crash if an invalid keyword file was specified - made some console prints more accurate - renamed some variables to improve code readability - automated cleanup on program termination
1 parent 0428878 commit 437aa09

1 file changed

Lines changed: 51 additions & 35 deletions

File tree

scrape.py

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import threading
33
from datetime import datetime
44
from json import decoder
5-
from os import path, mkdir
5+
from os import path, mkdir, remove
66
from os.path import isfile
77

88
try:
@@ -21,25 +21,33 @@ def status(message):
2121
print("{0} {1}".format(datetime.now(), message))
2222

2323

24+
# clean any temp files created during runtime
25+
def cleanup():
26+
if isfile("runfile"):
27+
remove("runfile")
28+
29+
2430
# main loop
2531
def main():
26-
status("Fetching latest pastes...")
32+
status("Fetching latest paste IDs...")
2733

2834
# fetch latest 100 paste IDs
2935
fetch_limit = 100
3036

3137
current_request = requests.get("https://scrape.pastebin.com/api_scraping.php?limit={0}".format(fetch_limit))
32-
3338
current_json = []
39+
3440
try:
3541
current_json = current_request.json()
3642

3743
except decoder.JSONDecodeError:
38-
status(termcolor.colored("Unable to fetch latest pastes. Make sure your IP is whitelisted at "
44+
status(termcolor.colored("Unable to fetch latest paste IDs. Make sure your IP is whitelisted at "
3945
"https://pastebin.com/doc_scraping_api", "red"))
46+
cleanup()
47+
4048
exit(0)
4149

42-
status("Pastes fetched. Processing...")
50+
status("Paste IDs fetched. Processing...")
4351

4452
# clean up fetched ids
4553
cleaned_json = []
@@ -55,26 +63,27 @@ def main():
5563
.format(entry["key"]))
5664

5765
entry_content = entry_request.text
58-
path_t_important = path.join("files", "{0}.txt".format(entry["key"]))
66+
path_file = path.join("files", "{0}.txt".format(entry["key"]))
5967

6068
paste_ids.append(entry["key"])
69+
6170
# if we have a provided keyword list, check for keywords
6271
if keywords is not None:
6372
for keyword in keywords:
6473
if keyword.upper() in entry_content.upper():
65-
print(termcolor.colored(" [KEYWORD] Paste \'{0}\' contains keyword \'{1}\'".format(entry["key"]
66-
, keyword)
67-
, "green"))
74+
bar.suffix = "%(index)d/%(max)d " + termcolor.colored("[KEYWORD] Paste \'{0}\' contains "
75+
"keyword \'{1}\'".format(entry["key"],
76+
keyword),
77+
"green")
6878

69-
entry_file = open(path_t_important, "w+", encoding='utf-8')
70-
entry_file.write(entry_content)
71-
entry_file.close()
79+
with open(path_file, "w+", encoding='utf-8') as entry_file:
80+
entry_file.write(entry_content)
7281

7382
break
7483
else:
75-
entry_file = open(path_t_important, "w+", encoding='utf-8')
76-
entry_file.write(entry_content)
77-
entry_file.close()
84+
with open(path_file, "w+", encoding='utf-8') as entry_file:
85+
entry_file.write(entry_content)
86+
bar.suffix = "%(index)d/%(max)d Saving paste \'{0}\'".format(entry["key"])
7887

7988
bar.next()
8089

@@ -104,38 +113,36 @@ def main():
104113

105114
AUTHOR = "SYRAPT0R"
106115
COPYRIGHT = "2019-2020"
107-
VERSION = "0.4.4"
108-
109-
status("STARTING PASTA SCRAPER {0}, (c) {1} {2}".format(VERSION, COPYRIGHT, AUTHOR))
110-
print()
111-
112-
# make sure file directories exists
113-
if not path.isdir("files"):
114-
status(termcolor.colored("No file directory found, creating...", "yellow"))
115-
mkdir("files")
116+
VERSION = "0.4.5"
116117

117118
# parse arguments
118119
keywords = None
119120

120121
parser = argparse.ArgumentParser(description="A script to scrape pastebin.com with optional keyword search")
121122

122123
parser.add_argument("-k", "--keywords", help="A file containing keywords for the search")
123-
parser.add_argument("-i", "--infinite", help="Whether to run in infinite mode.", action="store_true")
124+
parser.add_argument("-i", "--infinite", help="Whether to run in infinite mode (Default: false)",
125+
action="store_true", default=False)
124126

125127
args = parser.parse_args()
126128

127-
# create non infinite file if needed
128-
if args.infinite is False:
129-
status("Creating run file...")
130-
f = open("runfile", "w+", encoding='utf-8')
131-
f.close()
132-
else:
133-
status("Running in infinite mode...")
129+
status("STARTING PASTA SCRAPER {0}, (c) {1} {2}".format(VERSION, COPYRIGHT, AUTHOR))
130+
print()
131+
132+
# make sure file directories exists
133+
if not path.isdir("files"):
134+
status(termcolor.colored("No file directory found, creating...", "yellow"))
135+
mkdir("files")
134136

135137
if args.keywords is not None:
136-
f = open(args.keywords)
137-
keywords = f.readlines()
138-
f.close()
138+
try:
139+
with open(args.keywords, "r") as f:
140+
keywords = f.readlines()
141+
142+
except IOError:
143+
status(termcolor.colored("Unable to load specified keyword file. Aborting...", "red"))
144+
145+
exit(0)
139146

140147
keywords = [keyword.strip() for keyword in keywords]
141148

@@ -145,6 +152,15 @@ def main():
145152
paste_ids = []
146153
max_id_list_size = 200
147154

155+
# create non infinite file if needed
156+
if args.infinite is False:
157+
status("Creating run file...")
158+
159+
f = open("runfile", "w+")
160+
f.close()
161+
else:
162+
status("Running in infinite mode...")
163+
148164
# preparation done, enter main loop
149165
status("Entering main loop...")
150166
print()

0 commit comments

Comments
 (0)