From 2d1e3daae3a0e08d03e51abde5a5bb7aa02d895a Mon Sep 17 00:00:00 2001 From: Abel <196466003+DisabledAbel@users.noreply.github.com> Date: Wed, 13 May 2026 18:56:19 -0700 Subject: [PATCH 1/3] Add feed type endpoints for videos shorts and live --- README.md | 22 ++++++++++++++--- api/app.py | 26 ++++++++++++++------ rss_scanner.py | 67 +++++++++++++++++++++++++++----------------------- 3 files changed, 73 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 0112274..bf1afc0 100644 --- a/README.md +++ b/README.md @@ -88,12 +88,28 @@ npm start "https://www.youtube.com/@LinusTechTips" --- -### `/feed/` (GET) +### `/feed//` (GET) ### Get RSS feed via URL path Access a YouTube channel's RSS feed by passing the channel URL in the path. +Supported feed types: + +- `all` (existing combined behavior) +- `videos` (regular videos tab) +- `shorts` (shorts tab) +- `live` (live/streams tab) + +Endpoint examples: + +```text +/feed/all/:channel +/feed/videos/:channel +/feed/shorts/:channel +/feed/live/:channel +``` + **Important:** The `channel_url` parameter must be URL-encoded (percent-encoded) to avoid 404 errors. **Example with encoding:** @@ -105,13 +121,13 @@ const encodedUrl = encodeURIComponent(channelUrl); // Result: https%3A%2F%2Fwww.youtube.com%2Fchannel%2FUCXuqSBlHAE6Xw-yeJA0Tunw // Use in request -fetch(`/feed/${encodedUrl}`) +fetch(`/feed/videos/${encodedUrl}`) ``` **Alternative (recommended):** Use query parameters to avoid encoding issues: ```bash -/feed?channel_url=https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw +/feed/all/https%3A%2F%2Fwww.youtube.com%2Fchannel%2FUCXuqSBlHAE6Xw-yeJA0Tunw ``` --- diff --git a/api/app.py b/api/app.py index 3805816..1c5bf18 100644 --- a/api/app.py +++ b/api/app.py @@ -134,11 +134,18 @@ def send_to_discord(webhook_url: str, youtube_rss: str, channel_id: str | None, @app.route('/feed/') -@app.route('/feed/') -def get_feed(channel_url=None): +def feed_usage(): + return "Usage: /feed/{type}/{youtube_channel_url} where type is all|videos|shorts|live" + + +@app.route('/feed//') +def get_feed(feed_type, channel_url=None): """Generate RSS feed for given channel.""" if channel_url is None: - return "Usage: /feed/{youtube_channel_url}" + return "Usage: /feed/{type}/{youtube_channel_url}" + + if feed_type not in ("all", "videos", "shorts", "live"): + return Response("Invalid feed type", status=400) # Decode URL-encoded parts channel_url = urllib.parse.unquote(channel_url) @@ -150,7 +157,7 @@ def get_feed(channel_url=None): full_url = channel_url try: - _, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url) + _, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url, feed_type=feed_type) if atom_feed: # Fix channel name in feed @@ -170,10 +177,13 @@ def get_feed(channel_url=None): return Response(f"Error: {str(e)}", status=500) -@app.route('/feed/', methods=['GET']) -@cache.cached(timeout=300, key_prefix='feed_') -def get_cached_feed(channel): +@app.route('/feed//', methods=['GET']) +@cache.cached(timeout=300, key_prefix=lambda: f"feed_{request.path}") +def get_cached_feed(feed_type, channel): """Cached RSS feed endpoint - updates every 5 minutes.""" + if feed_type not in ("all", "videos", "shorts", "live"): + return Response("Invalid feed type", status=400) + # Clean channel from URL channel = urllib.parse.unquote(channel) if channel.startswith('http'): @@ -182,7 +192,7 @@ def get_cached_feed(channel): full_url = f"https://{channel}" try: - _, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url) + _, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url, feed_type=feed_type) if atom_feed: if channel_name: diff --git a/rss_scanner.py b/rss_scanner.py index d41d5b1..2c8816f 100644 --- a/rss_scanner.py +++ b/rss_scanner.py @@ -25,33 +25,35 @@ YOUTUBE_RSS_TEMPLATE = "https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" -def get_channel_videos(channel_id: str) -> list[dict]: - """Fetch recent videos from channel by scraping.""" +def _extract_video_ids_from_page(html: str, limit: int = 10) -> list[str]: + """Extract unique YouTube video IDs from page HTML.""" + video_ids: list[str] = [] + seen_ids: set[str] = set() + for match in re.finditer(r'"videoId":"([a-zA-Z0-9_-]{11})"', html): + video_id = match.group(1) + if video_id not in seen_ids: + seen_ids.add(video_id) + video_ids.append(video_id) + if len(video_ids) >= limit: + break + return video_ids + + +def get_channel_videos(channel_id: str, feed_type: str = "all", limit: int = 10) -> list[dict]: + """Fetch recent videos from channel pages by scraping.""" + page_path_by_type = { + "all": "videos", + "videos": "videos", + "shorts": "shorts", + "live": "streams", + } + page_path = page_path_by_type.get(feed_type, "videos") + try: - # Get channel home page to find uploads playlist ID - channel_url = f"https://www.youtube.com/channel/{channel_id}" - html = fetch_url(channel_url) - - # Find uploads playlist ID - uploads_match = re.search(r'"browseId":"([^"]+)","browseEndpoint":\{" browsePath":"[^"]*\/video', html) - - videos = [] - seen_ids = set() - - # Extract from videos page - videos_url = f"https://www.youtube.com/channel/{channel_id}/videos" - videos_html = fetch_url(videos_url) - - vid_pattern = re.compile(r'"videoId":"([a-zA-Z0-9_-]{11})"') - for vid_match in vid_pattern.finditer(videos_html): - vid = vid_match.group(1) - if vid not in seen_ids: - seen_ids.add(vid) - videos.append({'videoId': vid, 'title': f'Video {vid}', 'published': ''}) - if len(videos) >= 10: - break - - return videos + page_url = f"https://www.youtube.com/channel/{channel_id}/{page_path}" + page_html = fetch_url(page_url) + video_ids = _extract_video_ids_from_page(page_html, limit=limit) + return [{'videoId': vid, 'title': f'Video {vid}', 'published': ''} for vid in video_ids] except Exception: return [] @@ -289,7 +291,7 @@ def read_feed(feed_url: str, limit: int = 10) -> list[dict]: xml = fetch_url(feed_url) return parse_rss_entries(xml, limit=limit) -def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = "http://localhost:8080") -> tuple: +def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = "http://localhost:8080", feed_type: str = "all") -> tuple: """Get RSS feed data for a YouTube channel. Returns: (youtube_rss, channel_id, channel_name, atom_feed, video_count, invidious_rss, api_endpoints) @@ -299,8 +301,8 @@ def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = # YouTube's native RSS URL (mostly broken but included for reference) youtube_rss = YOUTUBE_RSS_TEMPLATE.format(channel_id=channel_id) - # Try to get videos from YouTube channel page - videos = get_channel_videos(channel_id) + # Try to get videos from the selected YouTube channel page + videos = get_channel_videos(channel_id, feed_type=feed_type) video_count = len(videos) # Generate Atom feed if we got videos @@ -324,8 +326,11 @@ def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = encoded_url = urllib.parse.quote(url, safe="") api_endpoints = { "json_api": f"{base_url.rstrip('/')}/api/feed", - "atom_feed_path": f"{base_url.rstrip('/')}/feed/{encoded_url}", - "atom_feed_query": f"{base_url.rstrip('/')}/feed/?channel_url={urllib.parse.quote(url)}", + "atom_feed_path": f"{base_url.rstrip('/')}/feed/all/{encoded_url}", + "atom_feed_query": f"{base_url.rstrip('/')}/feed/all/{urllib.parse.quote(url, safe="")}", + "videos_feed": f"{base_url.rstrip('/')}/feed/videos/{encoded_url}", + "shorts_feed": f"{base_url.rstrip('/')}/feed/shorts/{encoded_url}", + "live_feed": f"{base_url.rstrip('/')}/feed/live/{encoded_url}", } return youtube_rss, channel_id, channel_name, atom_feed, video_count, invidious_rss, api_endpoints From 7243c47025cde76d0da8d0c929a46d8a4b47e7ae Mon Sep 17 00:00:00 2001 From: Abel <196466003+DisabledAbel@users.noreply.github.com> Date: Wed, 13 May 2026 19:01:02 -0700 Subject: [PATCH 2/3] Add feed type selector in UI and API feed_type handling --- api/app.py | 6 +++++- api/index.html | 12 ++++++++++++ index.html | 11 +++++++++++ templates/index.html | 14 +++++++++++++- 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/api/app.py b/api/app.py index 1c5bf18..fef4c15 100644 --- a/api/app.py +++ b/api/app.py @@ -50,6 +50,9 @@ def api_feed(): try: # Normalize include_api_endpoints to boolean raw_value = data.get('include_api_endpoints', False) + feed_type = str(data.get('feed_type', request.args.get('feed_type', 'all'))).lower().strip() + if feed_type not in ('all', 'videos', 'shorts', 'live'): + return jsonify({'error': 'Invalid feed_type. Use all, videos, shorts, or live.'}), 400 if isinstance(raw_value, bool): include_api_endpoints = raw_value elif isinstance(raw_value, str): @@ -63,7 +66,8 @@ def api_feed(): youtube_rss, channel_id, channel_name, atom_feed, video_count, _, api_endpoints = rss_scanner.get_rss_feed( url, include_api_endpoints=include_api_endpoints, - base_url=base_url + base_url=base_url, + feed_type=feed_type ) discord_result = None diff --git a/api/index.html b/api/index.html index db471c1..dbfd3e2 100644 --- a/api/index.html +++ b/api/index.html @@ -122,6 +122,16 @@

YouTube RSS Scanner

+
+ + +
+
+
+ + +
+
+
+ + +
+