diff --git a/git.py b/git.py new file mode 100755 index 00000000..c3365137 --- /dev/null +++ b/git.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" +GitHub search scraper - ALWAYS sorted by most recent +Usage: python git.py -- +""" +import sys +import requests +from datetime import datetime, timedelta + +def fetch_github_repos(query): + """Fetch MOST RECENT repos from GitHub, filtered by query""" + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Accept': 'application/vnd.github.v3+json' + } + + # GitHub API search endpoint - sorted by recently created + # Use created:>YYYY-MM-DD to get recent repos + today = datetime.now() + week_ago = today - timedelta(days=7) + date_filter = week_ago.strftime('%Y-%m-%d') + + # Build search query + if query and query.strip(): + search_query = f"{query} created:>{date_filter}" + else: + search_query = f"created:>{date_filter}" + + url = "https://api.github.com/search/repositories" + params = { + 'q': search_query, + 'sort': 'created', + 'order': 'desc', + 'per_page': 30 + } + + try: + response = requests.get(url, headers=headers, params=params, timeout=15) + response.raise_for_status() + + data = response.json() + repos = [] + + for item in data.get('items', []): + repos.append({ + 'name': item['full_name'], + 'description': item['description'] or 'No description', + 'stars': item['stargazers_count'], + 'language': item['language'] or 'Unknown', + 'created': item['created_at'], + 'url': item['html_url'] + }) + + return repos + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return [] + +def main(): + query = None + + for arg in sys.argv[1:]: + if arg.startswith('--'): + query = arg[2:] + break + + if query is None: + print("Usage: python git.py --") + print("Example: python git.py --machine-learning") + print(" python git.py --api") + print(" python git.py --'' (all recent)") + sys.exit(1) + + print(f"Searching GitHub for: '{query}' (sorted by MOST RECENT)") + print("=" * 60) + + repos = fetch_github_repos(query) + + if repos: + print(f"\nFound {len(repos)} most recent repositories:\n") + for i, repo in enumerate(repos, 1): + print(f"{i}. {repo['name']}") + print(f" {repo['description'][:100]}...") + print(f" Language: {repo['language']} | Stars: {repo['stars']}") + print(f" Created: {repo['created']}") + print(f" URL: {repo['url']}") + print() + else: + print(f"No repositories found matching '{query}'") + +if __name__ == '__main__': + main() + diff --git a/npm.py b/npm.py new file mode 100755 index 00000000..6bd7e1a4 --- /dev/null +++ b/npm.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +npm search scraper - ALWAYS sorted by most recent +Usage: python npm.py -- +""" +import sys +import requests +from datetime import datetime + +def fetch_npm_packages(query): + """Fetch MOST RECENT packages from npm, filtered by query""" + headers = {'User-Agent': 'Mozilla/5.0'} + + # npm registry search API + # Use the search endpoint with text query + url = "https://registry.npmjs.org/-/v1/search" + + params = { + 'text': query if query and query.strip() else '', + 'size': 30 + } + + try: + response = requests.get(url, headers=headers, params=params, timeout=15) + response.raise_for_status() + + data = response.json() + packages = [] + + for item in data.get('objects', []): + pkg = item.get('package', {}) + + # Get the date from package metadata + date_str = pkg.get('date', '') + + packages.append({ + 'name': pkg.get('name', 'Unknown'), + 'version': pkg.get('version', 'N/A'), + 'description': pkg.get('description', 'No description'), + 'author': pkg.get('author', {}).get('name', 'Unknown') if isinstance(pkg.get('author'), dict) else str(pkg.get('author', 'Unknown')), + 'date': date_str, + 'url': f"https://www.npmjs.com/package/{pkg.get('name', '')}" + }) + + # Sort by date (most recent first) + packages.sort(key=lambda x: x['date'], reverse=True) + + return packages + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return [] + +def main(): + query = None + + for arg in sys.argv[1:]: + if arg.startswith('--'): + query = arg[2:] + break + + if query is None: + print("Usage: python npm.py --") + print("Example: python npm.py --react") + print(" python npm.py --typescript") + print(" python npm.py --api") + sys.exit(1) + + print(f"Searching npm for: '{query}' (sorted by MOST RECENT)") + print("=" * 60) + + packages = fetch_npm_packages(query) + + if packages: + print(f"\nFound {len(packages)} most recent packages:\n") + for i, pkg in enumerate(packages, 1): + print(f"{i}. {pkg['name']} ({pkg['version']})") + print(f" {pkg['description'][:100]}...") + print(f" Author: {pkg['author']}") + print(f" Published: {pkg['date']}") + print(f" URL: {pkg['url']}") + print() + else: + print(f"No packages found matching '{query}'") + +if __name__ == '__main__': + main() + diff --git a/pypi.py b/pypi.py new file mode 100755 index 00000000..a3d4c009 --- /dev/null +++ b/pypi.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +PyPI search scraper - fetches from https://pypi.org/search/?q=&o=-created +Uses Playwright to bypass JS client challenge, then parses HTML with BeautifulSoup. +Usage: python pypi.py -- +""" +import sys +from playwright.sync_api import sync_playwright +from bs4 import BeautifulSoup + +PYPI_SEARCH_URL = "https://pypi.org/search/?q={query}&o=-created" + +def fetch_pypi_packages(query): + """Fetch packages from https://pypi.org/search/?q=&o=-created""" + url = PYPI_SEARCH_URL.format(query=query) + + try: + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, + args=['--disable-blink-features=AutomationControlled', '--no-sandbox'] + ) + ctx = browser.new_context( + user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + viewport={'width': 1920, 'height': 1080}, + ) + page = ctx.new_page() + page.add_init_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})') + page.goto(url, timeout=30000) + page.wait_for_selector('a.package-snippet', timeout=15000) + html = page.content() + browser.close() + + soup = BeautifulSoup(html, 'html.parser') + snippets = soup.select('a.package-snippet') + + packages = [] + for s in snippets: + name = s.select_one('.package-snippet__name') + ver = s.select_one('.package-snippet__version') + desc = s.select_one('.package-snippet__description') + created = s.select_one('.package-snippet__created time') + href = s.get('href', '') + + packages.append({ + 'name': name.text.strip() if name else '?', + 'version': ver.text.strip() if ver else '?', + 'description': desc.text.strip() if desc else '', + 'published': created.get('datetime', '?') if created else '?', + 'link': f'https://pypi.org{href}' if href else '?' + }) + + return packages + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return [] + +def main(): + query = None + + for arg in sys.argv[1:]: + if arg.startswith('--'): + query = arg[2:] + break + + if query is None: + print("Usage: python pypi.py --") + print("Example: python pypi.py --workflow") + print(" python pypi.py --mcp") + print(" python pypi.py --agent") + sys.exit(1) + + url = PYPI_SEARCH_URL.format(query=query) + print(f"Fetching: {url}") + print("=" * 60) + + packages = fetch_pypi_packages(query) + + if packages: + print(f"\nFound {len(packages)} packages:\n") + for i, pkg in enumerate(packages, 1): + print(f"{i}. {pkg['name']} ({pkg['version']})") + print(f" {pkg['description'][:100]}") + print(f" Published: {pkg['published']}") + print(f" Link: {pkg['link']}") + print() + else: + print(f"No packages found matching '{query}'") + +if __name__ == '__main__': + main() +