Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
"""
GitHub search scraper - ALWAYS sorted by most recent
Usage: python git.py --<query>
"""
import sys
import requests
from datetime import datetime, timedelta

def fetch_github_repos(query):
"""Fetch MOST RECENT repos from GitHub, filtered by query"""
headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/vnd.github.v3+json'
}

# GitHub API search endpoint - sorted by recently created
# Use created:>YYYY-MM-DD to get recent repos
today = datetime.now()
week_ago = today - timedelta(days=7)
date_filter = week_ago.strftime('%Y-%m-%d')

# Build search query
if query and query.strip():
search_query = f"{query} created:>{date_filter}"
else:
search_query = f"created:>{date_filter}"

url = "https://api.github.com/search/repositories"
params = {
'q': search_query,
'sort': 'created',
'order': 'desc',
'per_page': 30
}

try:
response = requests.get(url, headers=headers, params=params, timeout=15)
response.raise_for_status()

data = response.json()
repos = []

for item in data.get('items', []):
repos.append({
'name': item['full_name'],
'description': item['description'] or 'No description',
'stars': item['stargazers_count'],
'language': item['language'] or 'Unknown',
'created': item['created_at'],
'url': item['html_url']
})

return repos

except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return []

def main():
query = None

for arg in sys.argv[1:]:
if arg.startswith('--'):
query = arg[2:]
break

if query is None:
print("Usage: python git.py --<query>")
print("Example: python git.py --machine-learning")
print(" python git.py --api")
print(" python git.py --'' (all recent)")
sys.exit(1)

print(f"Searching GitHub for: '{query}' (sorted by MOST RECENT)")
print("=" * 60)

repos = fetch_github_repos(query)

if repos:
print(f"\nFound {len(repos)} most recent repositories:\n")
for i, repo in enumerate(repos, 1):
print(f"{i}. {repo['name']}")
print(f" {repo['description'][:100]}...")
print(f" Language: {repo['language']} | Stars: {repo['stars']}")
print(f" Created: {repo['created']}")
print(f" URL: {repo['url']}")
print()
else:
print(f"No repositories found matching '{query}'")

if __name__ == '__main__':
main()

90 changes: 90 additions & 0 deletions npm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
npm search scraper - ALWAYS sorted by most recent
Usage: python npm.py --<query>
"""
import sys
import requests
from datetime import datetime

def fetch_npm_packages(query):
"""Fetch MOST RECENT packages from npm, filtered by query"""
headers = {'User-Agent': 'Mozilla/5.0'}

# npm registry search API
# Use the search endpoint with text query
url = "https://registry.npmjs.org/-/v1/search"

params = {
'text': query if query and query.strip() else '',
'size': 30
}

try:
response = requests.get(url, headers=headers, params=params, timeout=15)
response.raise_for_status()

data = response.json()
packages = []

for item in data.get('objects', []):
pkg = item.get('package', {})

# Get the date from package metadata
date_str = pkg.get('date', '')

packages.append({
'name': pkg.get('name', 'Unknown'),
'version': pkg.get('version', 'N/A'),
'description': pkg.get('description', 'No description'),
'author': pkg.get('author', {}).get('name', 'Unknown') if isinstance(pkg.get('author'), dict) else str(pkg.get('author', 'Unknown')),
'date': date_str,
'url': f"https://www.npmjs.com/package/{pkg.get('name', '')}"
})

# Sort by date (most recent first)
packages.sort(key=lambda x: x['date'], reverse=True)

return packages

except Exception as e:
print(f"Error: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
return []

def main():
query = None

for arg in sys.argv[1:]:
if arg.startswith('--'):
query = arg[2:]
break

if query is None:
print("Usage: python npm.py --<query>")
print("Example: python npm.py --react")
print(" python npm.py --typescript")
print(" python npm.py --api")
sys.exit(1)

print(f"Searching npm for: '{query}' (sorted by MOST RECENT)")
print("=" * 60)

packages = fetch_npm_packages(query)

if packages:
print(f"\nFound {len(packages)} most recent packages:\n")
for i, pkg in enumerate(packages, 1):
print(f"{i}. {pkg['name']} ({pkg['version']})")
print(f" {pkg['description'][:100]}...")
print(f" Author: {pkg['author']}")
print(f" Published: {pkg['date']}")
print(f" URL: {pkg['url']}")
print()
else:
print(f"No packages found matching '{query}'")

if __name__ == '__main__':
main()

93 changes: 93 additions & 0 deletions pypi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""
PyPI search scraper - fetches from https://pypi.org/search/?q=<QUERY>&o=-created
Uses Playwright to bypass JS client challenge, then parses HTML with BeautifulSoup.
Usage: python pypi.py --<query>
"""
import sys
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup

PYPI_SEARCH_URL = "https://pypi.org/search/?q={query}&o=-created"

def fetch_pypi_packages(query):
"""Fetch packages from https://pypi.org/search/?q=<QUERY>&o=-created"""
url = PYPI_SEARCH_URL.format(query=query)

try:
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
args=['--disable-blink-features=AutomationControlled', '--no-sandbox']
)
ctx = browser.new_context(
user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
viewport={'width': 1920, 'height': 1080},
)
page = ctx.new_page()
page.add_init_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
page.goto(url, timeout=30000)
page.wait_for_selector('a.package-snippet', timeout=15000)
html = page.content()
browser.close()

soup = BeautifulSoup(html, 'html.parser')
snippets = soup.select('a.package-snippet')

packages = []
for s in snippets:
name = s.select_one('.package-snippet__name')
ver = s.select_one('.package-snippet__version')
desc = s.select_one('.package-snippet__description')
created = s.select_one('.package-snippet__created time')
href = s.get('href', '')

packages.append({
'name': name.text.strip() if name else '?',
'version': ver.text.strip() if ver else '?',
'description': desc.text.strip() if desc else '',
'published': created.get('datetime', '?') if created else '?',
'link': f'https://pypi.org{href}' if href else '?'
})

return packages

except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return []

def main():
query = None

for arg in sys.argv[1:]:
if arg.startswith('--'):
query = arg[2:]
break

if query is None:
print("Usage: python pypi.py --<query>")
print("Example: python pypi.py --workflow")
print(" python pypi.py --mcp")
print(" python pypi.py --agent")
sys.exit(1)

url = PYPI_SEARCH_URL.format(query=query)
print(f"Fetching: {url}")
print("=" * 60)

packages = fetch_pypi_packages(query)

if packages:
print(f"\nFound {len(packages)} packages:\n")
for i, pkg in enumerate(packages, 1):
print(f"{i}. {pkg['name']} ({pkg['version']})")
print(f" {pkg['description'][:100]}")
print(f" Published: {pkg['published']}")
print(f" Link: {pkg['link']}")
print()
else:
print(f"No packages found matching '{query}'")

if __name__ == '__main__':
main()