devanshbatham · aishakhalafalharasia-ops · Apr 23, 2026
diff --git a/build/lib/paramspider/__init__.py b/build/lib/paramspider/__init__.py
diff --git a/build/lib/paramspider/client.py b/build/lib/paramspider/client.py
@@ -0,0 +1,67 @@
+import requests
+import random
+import json
+import logging
+import time
+import sys
+
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+MAX_RETRIES = 3
+
+def load_user_agents():
+    """
+    Loads user agents
+    """
+
+    return [
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
+    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.45",
+    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36 Edge/16.16299",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 OPR/45.0.2552.898",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Vivaldi/1.8.770.50",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/15.15063",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/15.15063",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36"
+  ]
+
+def fetch_url_content(url,proxy):
+    """
+    Fetches the content of a URL using a random user agent.
+    Retries up to MAX_RETRIES times if the request fails.
+    """
+    user_agents = load_user_agents()
+    if proxy is not None:
+        proxy={
+            'http':proxy,
+            'https':proxy
+        }
+    for i in range(MAX_RETRIES):
+        user_agent = random.choice(user_agents)
+        headers = {
+            "User-Agent": user_agent
+        }
+
+        try:
+            response = requests.get(url, proxies=proxy,headers=headers)
+            response.raise_for_status()
+            return response
+        except (requests.exceptions.RequestException, ValueError):
+            logging.warning(f"Error fetching URL {url}. Retrying in 5 seconds...")
+            time.sleep(5)
+        except KeyboardInterrupt:
+            logging.warning("Keyboard Interrupt re ceived. Exiting gracefully...")
+            sys.exit()
+
+    logging.error(f"Failed to fetch URL {url} after {MAX_RETRIES} retries.")
+    sys.exit()
diff --git a/build/lib/paramspider/main.py b/build/lib/paramspider/main.py
@@ -0,0 +1,168 @@
+import argparse
+import os
+import logging
+import colorama
+from colorama import Fore, Style
+from . import client  # Importing client from a module named "client"
+from urllib.parse import urlparse, parse_qs, urlencode
+import os
+
+yellow_color_code = "\033[93m"
+reset_color_code = "\033[0m"
+
+colorama.init(autoreset=True)  # Initialize colorama for colored terminal output
+
+log_format = '%(message)s'
+logging.basicConfig(format=log_format, level=logging.INFO)
+logging.getLogger('').handlers[0].setFormatter(logging.Formatter(log_format))
+
+HARDCODED_EXTENSIONS = [
+    ".jpg", ".jpeg", ".png", ".gif", ".pdf", ".svg", ".json",
+    ".css", ".js", ".webp", ".woff", ".woff2", ".eot", ".ttf", ".otf", ".mp4", ".txt"
+]
+
+def has_extension(url, extensions):
+    """
+    Check if the URL has a file extension matching any of the provided extensions.
+
+    Args:
+        url (str): The URL to check.
+        extensions (list): List of file extensions to match against.
+
+    Returns:
+        bool: True if the URL has a matching extension, False otherwise.
+    """
+    parsed_url = urlparse(url)
+    path = parsed_url.path
+    extension = os.path.splitext(path)[1].lower()
+
+    return extension in extensions
+
+def clean_url(url):
+    """
+    Clean the URL by removing redundant port information for HTTP and HTTPS URLs.
+
+    Args:
+        url (str): The URL to clean.
+
+    Returns:
+        str: Cleaned URL.
+    """
+    parsed_url = urlparse(url)
+
+    if (parsed_url.port == 80 and parsed_url.scheme == "http") or (parsed_url.port == 443 and parsed_url.scheme == "https"):
+        parsed_url = parsed_url._replace(netloc=parsed_url.netloc.rsplit(":", 1)[0])
+
+    return parsed_url.geturl()
+
+def clean_urls(urls, extensions, placeholder):
+    """
+    Clean a list of URLs by removing unnecessary parameters and query strings.
+
+    Args:
+        urls (list): List of URLs to clean.
+        extensions (list): List of file extensions to check against.
+
+    Returns:
+        list: List of cleaned URLs.
+    """
+    cleaned_urls = set()
+    for url in urls:
+        cleaned_url = clean_url(url)
+        if not has_extension(cleaned_url, extensions):
+            parsed_url = urlparse(cleaned_url)
+            query_params = parse_qs(parsed_url.query)
+            cleaned_params = {key: placeholder for key in query_params}
+            cleaned_query = urlencode(cleaned_params, doseq=True)
+            cleaned_url = parsed_url._replace(query=cleaned_query).geturl()
+            cleaned_urls.add(cleaned_url)
+    return list(cleaned_urls)
+
+def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder):
+    """
+    Fetch and clean URLs related to a specific domain from the Wayback Machine.
+
+    Args:
+        domain (str): The domain name to fetch URLs for.
+        extensions (list): List of file extensions to check against.
+        stream_output (bool): True to stream URLs to the terminal.
+
+    Returns:
+        None
+    """
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Fetching URLs for {Fore.CYAN + domain + Style.RESET_ALL}")
+    wayback_uri = f"https://web.archive.org/cdx/search/cdx?url={domain}/*&output=txt&collapse=urlkey&fl=original&page=/"
+    response = client.fetch_url_content(wayback_uri,proxy)
+    urls = response.text.split()
+
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(urls)) + Style.RESET_ALL} URLs for {Fore.CYAN + domain + Style.RESET_ALL}")
+
+    cleaned_urls = clean_urls(urls, extensions, placeholder)
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Cleaning URLs for {Fore.CYAN + domain + Style.RESET_ALL}")
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(cleaned_urls)) + Style.RESET_ALL} URLs after cleaning")
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Extracting URLs with parameters")
+
+    results_dir = "results"
+    if not os.path.exists(results_dir):
+        os.makedirs(results_dir)
+
+    result_file = os.path.join(results_dir, f"{domain}.txt")
+
+    with open(result_file, "w") as f:
+        for url in cleaned_urls:
+            if "?" in url:
+                f.write(url + "\n")
+                if stream_output:
+                    print(url)
+
+    logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Saved cleaned URLs to {Fore.CYAN + result_file + Style.RESET_ALL}")
+
+def main():
+    """
+    Main function to handle command-line arguments and start URL mining process.
+    """
+    log_text = """
+
+                                      _    __       
+   ___  ___ ________ ___ _  ___ ___  (_)__/ /__ ____
+  / _ \/ _ `/ __/ _ `/  ' \(_-</ _ \/ / _  / -_) __/
+ / .__/\_,_/_/  \_,_/_/_/_/___/ .__/_/\_,_/\__/_/   
+/_/                          /_/                    
+
+                              with <3 by @0xasm0d3us           
+    """
+    colored_log_text = f"{yellow_color_code}{log_text}{reset_color_code}"
+    print(colored_log_text)
+    parser = argparse.ArgumentParser(description="Mining URLs from dark corners of Web Archives ")
+    parser.add_argument("-d", "--domain", help="Domain name to fetch related URLs for.")
+    parser.add_argument("-l", "--list", help="File containing a list of domain names.")
+    parser.add_argument("-s", "--stream", action="store_true", help="Stream URLs on the terminal.")
+    parser.add_argument("--proxy", help="Set the proxy address for web requests.",default=None)
+    parser.add_argument("-p", "--placeholder", help="placeholder for parameter values", default="FUZZ")
+    args = parser.parse_args()
+
+    if not args.domain and not args.list:
+        parser.error("Please provide either the -d option or the -l option.")
+
+    if args.domain and args.list:
+        parser.error("Please provide either the -d option or the -l option, not both.")
+
+    if args.list:
+        with open(args.list, "r") as f:
+            domains = [line.strip().lower().replace('https://', '').replace('http://', '') for line in f.readlines()]
+            domains = [domain for domain in domains if domain]  # Remove empty lines
+            domains = list(set(domains))  # Remove duplicates
+    else:
+        domain = args.domain
+
+    extensions = HARDCODED_EXTENSIONS
+
+    if args.domain:
+        fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder)
+
+    if args.list:
+        for domain in domains:
+            fetch_and_clean_urls(domain, extensions, args.stream,args.proxy, args.placeholder)
+
+if __name__ == "__main__":
+    main()
diff --git a/paramspider.egg-info/PKG-INFO b/paramspider.egg-info/PKG-INFO
@@ -0,0 +1,99 @@
+Metadata-Version: 2.4
+Name: paramspider
+Version: 0.1.0
+Summary: Mining parameters from dark corners of Web Archives
+Author: Devansh Batham
+Author-email: devanshbatham009@gmail.com
+License: MIT
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests
+Requires-Dist: colorama
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: summary
+
+<h1 align="center">
+    paramspider
+  <br>
+</h1>
+
+<h4 align="center">  Mining URLs from dark corners of Web Archives for bug hunting/fuzzing/further probing </h4>
+
+<p align="center">
+  <a href="#about">📖 About</a> •
+  <a href="#installation">🏗️ Installation</a> •
+  <a href="#usage">⛏️ Usage</a> •
+  <a href="#examples">🚀 Examples</a> •
+  <a href="#contributing">🤝 Contributing</a> •
+</p>
+
+
+![paramspider](https://github.com/devanshbatham/ParamSpider/blob/master/static/paramspider.png?raw=true)
+
+## About
+
+`paramspider` allows you to fetch URLs related to any domain or a list of domains from Wayback achives. It filters out "boring" URLs, allowing you to focus on the ones that matter the most.
+
+## Installation
+
+To install `paramspider`, follow these steps:
+
+```sh
+git clone https://github.com/devanshbatham/paramspider
+cd paramspider
+pip install .
+```
+
+## Usage
+
+To use `paramspider`, follow these steps:
+
+```sh
+paramspider -d example.com
+```
+
+## Examples
+
+Here are a few examples of how to use `paramspider`:
+
+- Discover URLs for a single domain:
+
+  ```sh
+  paramspider -d example.com
+  ```
+
+- Discover URLs for multiple domains from a file:
+
+  ```sh
+  paramspider -l domains.txt
+  ```
+
+- Stream URLs on the termial:
+
+    ```sh 
+    paramspider -d example.com -s
+    ```
+
+- Set up web request proxy:
+
+    ```sh
+    paramspider -d example.com --proxy '127.0.0.1:7890'
+    ```
+- Adding a placeholder for URL parameter values (default: "FUZZ"): 
+
+  ```sh
+   paramspider -d example.com -p '"><h1>reflection</h1>'
+  ```
+
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=devanshbatham/paramspider&type=Date)](https://star-history.com/#devanshbatham/paramspider&Date)
+
+
diff --git a/paramspider.egg-info/SOURCES.txt b/paramspider.egg-info/SOURCES.txt
@@ -0,0 +1,12 @@
+LICENSE
+README.md
+setup.py
+paramspider/__init__.py
+paramspider/client.py
+paramspider/main.py
+paramspider.egg-info/PKG-INFO
+paramspider.egg-info/SOURCES.txt
+paramspider.egg-info/dependency_links.txt
+paramspider.egg-info/entry_points.txt
+paramspider.egg-info/requires.txt
+paramspider.egg-info/top_level.txt
diff --git a/paramspider.egg-info/dependency_links.txt b/paramspider.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/paramspider.egg-info/entry_points.txt b/paramspider.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+paramspider = paramspider.main:main
diff --git a/paramspider.egg-info/requires.txt b/paramspider.egg-info/requires.txt
@@ -0,0 +1,2 @@
+requests
+colorama
diff --git a/paramspider.egg-info/top_level.txt b/paramspider.egg-info/top_level.txt
@@ -0,0 +1 @@
+paramspider
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[console_scripts]
		paramspider = paramspider.main:main