forked from c4rb0nx1/ShadowBot
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathShadowBot_V1.0.0.py
More file actions
94 lines (80 loc) · 3.18 KB
/
ShadowBot_V1.0.0.py
File metadata and controls
94 lines (80 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
from queue import Queue
import socks
import socket
import signal
import time
f = open('onion links.txt','w',encoding = 'utf-8')
# Configure the SOCKS proxy
socks.set_default_proxy(socks.SOCKS5, "localhost", 9050)
socket.socket = socks.socksocket
url = input("Enter Your Parent URL: ")
# Initialize queue and add starting URL
queue = Queue()
queue.put(url)
visited = set()
tot_web = set()
final_links = []
# Set recursion limit
MAX_RECURSION = float('inf') #setting recursion limit to infinity!
count = 1
while not queue.empty() and len(visited) < MAX_RECURSION:
# Get next URL from queue
try:
url = queue.get()
# Skip URL if already visited
if url in visited:
continue
# Print progress
print(f"Crawling {count} : {url}...")
# Add URL to visited set
visited.add(url)
# Send GET request and parse HTML content
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# Extract links from HTML content
for link in soup.find_all("a"):
# Get absolute URL
href = link.get("href")
if href is not None:
href = urljoin(url, href)
# Add URL to queue if it hasn't been visited yet
if href not in visited:
queue.put(href)
tot_web.add(href)
count+=1
except KeyboardInterrupt:
print("\n\n\n---------------------------------------------------------------------------")
print("\nKeyboardInterrupt detected! Accessing Crawler Control.")
try:
final_links = [i for i in tot_web if '.onion' in i]
print("\n---------------------------------------------------------------------------")
command = input('''d: display onion links and continue crawling \nc: show stats and continue without displaying onion links\nx: exit crawling \nEnter (d/c/x) : ''')
if(command == 'd'):
[print(i) for i in final_links]
print("\n continuing to Crawl chief :) \n")
elif command == 'c':
print("\n---------------------------------------------------------------------------")
print("\nStats:")
print("\nTotal onion links :",len(final_links))
print("\nTotal links crawled",len(tot_web))
print("\n---------------------------------------------------------------------------")
print("\n continuing to Crawl chief :) \n")
elif command == 'x':
save = input("Do you want to save? (y/n): ")
if save == 'y':
print("saving....")
for line in final_links:
f.write(line)
f.write("\n")
elif save == 'n':
print("exited without saving links.")
time.sleep(1)
print("Night Night :) im going back to the shadows!")
break
except TypeError:
pass
except:
print("Access Denied")