Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,28 @@ npm start "https://www.youtube.com/@LinusTechTips"

---

### `/feed/<channel_url>` (GET)
### `/feed/<type>/<channel_url>` (GET)

### Get RSS feed via URL path

Access a YouTube channel's RSS feed by passing the channel URL in the path.

Supported feed types:

- `all` (existing combined behavior)
- `videos` (regular videos tab)
- `shorts` (shorts tab)
- `live` (live/streams tab)

Endpoint examples:

```text
/feed/all/:channel
/feed/videos/:channel
/feed/shorts/:channel
/feed/live/:channel
```

**Important:** The `channel_url` parameter must be URL-encoded (percent-encoded) to avoid 404 errors.

**Example with encoding:**
Expand All @@ -105,13 +121,13 @@ const encodedUrl = encodeURIComponent(channelUrl);
// Result: https%3A%2F%2Fwww.youtube.com%2Fchannel%2FUCXuqSBlHAE6Xw-yeJA0Tunw

// Use in request
fetch(`/feed/${encodedUrl}`)
fetch(`/feed/videos/${encodedUrl}`)
```

**Alternative (recommended):** Use query parameters to avoid encoding issues:
**Alternative (recommended):** Use URL-encoded paths to avoid encoding issues:

```bash
/feed?channel_url=https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw
/feed/all/https%3A%2F%2Fwww.youtube.com%2Fchannel%2FUCXuqSBlHAE6Xw-yeJA0Tunw
```

---
Expand Down
63 changes: 22 additions & 41 deletions api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def api_feed():
try:
# Normalize include_api_endpoints to boolean
raw_value = data.get('include_api_endpoints', False)
feed_type = str(data.get('feed_type', request.args.get('feed_type', 'all'))).lower().strip()
if feed_type not in ('all', 'videos', 'shorts', 'live'):
return jsonify({'error': 'Invalid feed_type. Use all, videos, shorts, or live.'}), 400
if isinstance(raw_value, bool):
include_api_endpoints = raw_value
elif isinstance(raw_value, str):
Expand All @@ -63,7 +66,8 @@ def api_feed():
youtube_rss, channel_id, channel_name, atom_feed, video_count, _, api_endpoints = rss_scanner.get_rss_feed(
url,
include_api_endpoints=include_api_endpoints,
base_url=base_url
base_url=base_url,
feed_type=feed_type
)

discord_result = None
Expand Down Expand Up @@ -134,57 +138,34 @@ def send_to_discord(webhook_url: str, youtube_rss: str, channel_id: str | None,


@app.route('/feed/')
@app.route('/feed/<path:channel_url>')
def get_feed(channel_url=None):
"""Generate RSS feed for given channel."""
def feed_usage():
return "Usage: /feed/{type}/{youtube_channel_url} where type is all|videos|shorts|live"


@app.route('/feed/<feed_type>/<path:channel_url>')
@cache.cached(timeout=300, key_prefix=lambda: f"feed_{request.path}")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Cache stores transient error responses (404/500) for 5 minutes

The @cache.cached decorator at api/app.py:146 caches all responses from get_feed, including Response("No videos found", status=404) at line 180 and Response(f"Error: {str(e)}", status=500) at line 182. If YouTube scraping temporarily fails (e.g., due to rate limiting or network issues), the error response is served from cache for 5 minutes, even after the transient issue resolves. The old code effectively had no working cache (the cached route get_cached_feed was shadowed by the identically-patterned uncached get_feed route), so this is a new behavior introduced by the PR.

Suggested change
@cache.cached(timeout=300, key_prefix=lambda: f"feed_{request.path}")
@cache.cached(timeout=300, key_prefix=lambda: f"feed_{request.path}", response_filter=lambda response: response.status_code == 200)
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

def get_feed(feed_type, channel_url=None):
"""Generate RSS feed for given channel. Cached for 5 minutes."""
if channel_url is None:
return "Usage: /feed/{youtube_channel_url}"

return "Usage: /feed/{type}/{youtube_channel_url}"

if feed_type not in ("all", "videos", "shorts", "live"):
return Response("Invalid feed type", status=400)

# Decode URL-encoded parts
channel_url = urllib.parse.unquote(channel_url)

# Reconstruct full URL (Flask captures everything after /feed/)
if not channel_url.startswith('http'):
full_url = f"https://{channel_url}"
else:
full_url = channel_url

try:
_, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url)

if atom_feed:
# Fix channel name in feed
if channel_name:
atom_feed = atom_feed.replace(
f">{channel_id or channel_id} - YouTube Videos",
f">{channel_name} - YouTube Videos"
)
atom_feed = atom_feed.replace(
f"<name>{channel_id or channel_id}</name>",
f"<name>{channel_name}</name>"
)
return Response(atom_feed, mimetype='application/xml')
else:
return Response("No videos found", status=404)
except Exception as e:
return Response(f"Error: {str(e)}", status=500)


@app.route('/feed/<path:channel>', methods=['GET'])
@cache.cached(timeout=300, key_prefix='feed_')
def get_cached_feed(channel):
"""Cached RSS feed endpoint - updates every 5 minutes."""
# Clean channel from URL
channel = urllib.parse.unquote(channel)
if channel.startswith('http'):
full_url = channel
else:
full_url = f"https://{channel}"

try:
_, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url)
_, channel_id, channel_name, atom_feed, video_count, _, _ = rss_scanner.get_rss_feed(full_url, feed_type=feed_type)

if atom_feed:
# Fix channel name in feed
if channel_name:
atom_feed = atom_feed.replace(
f">{channel_id or channel_id} - YouTube Videos",
Expand Down
12 changes: 12 additions & 0 deletions api/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ <h1>YouTube RSS Scanner</h1>
<input type="text" id="channelUrl" placeholder="https://www.youtube.com/@channel or https://www.youtube.com/c/username">
</div>

<div class="input-group">
<label>Feed Type</label>
<select id="feedType">
<option value="all">All (combined)</option>
<option value="videos">Videos only</option>
<option value="shorts">Shorts only</option>
<option value="live">Live streams only</option>
</select>
</div>

<button id="getFeedBtn" onclick="getFeed()">Get RSS Feed</button>
<div class="input-group" style="margin-top: 12px;">
<label>
Expand Down Expand Up @@ -190,6 +200,7 @@ <h1>YouTube RSS Scanner</h1>
body: JSON.stringify({
url: url,
include_api_endpoints: document.getElementById('includeApiEndpoints').checked,
feed_type: document.getElementById('feedType').value,
discord_webhook_url: document.getElementById('discordWebhookUrl').value.trim()
})
});
Expand All @@ -213,6 +224,7 @@ <h1>YouTube RSS Scanner</h1>
if (data.api_endpoints && Object.keys(data.api_endpoints).length > 0) {
html += '<div class="atom-output"><div class="feed-label">API Endpoints:</div>';
html += '<div class="feed-link">POST JSON: ' + data.api_endpoints.json_api + '</div>';
if (data.api_endpoints.atom_feed_path) { html += '<div class="feed-link">Selected Feed: ' + data.api_endpoints.atom_feed_path + '</div>'; }
html += '</div>';
}

Expand Down
11 changes: 11 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ <h1>YouTube RSS Scanner</h1>
<input type="text" id="channelUrl" placeholder="https://www.youtube.com/@channel or https://www.youtube.com/c/username">
</div>

<div class="input-group">
<label>Feed Type</label>
<select id="feedType">
<option value="all">All (combined)</option>
<option value="videos">Videos only</option>
<option value="shorts">Shorts only</option>
<option value="live">Live streams only</option>
</select>
</div>

<button id="getFeedBtn" onclick="getFeed()">Get RSS Feed</button>
<div class="input-group" style="margin-top: 12px;">
<label>
Expand Down Expand Up @@ -193,6 +203,7 @@ <h1>YouTube RSS Scanner</h1>
body: JSON.stringify({
url: url,
include_api_endpoints: document.getElementById('includeApiEndpoints').checked,
feed_type: document.getElementById('feedType').value,
discord_webhook_url: document.getElementById('discordWebhookUrl').value.trim()
})
});
Expand Down
67 changes: 36 additions & 31 deletions rss_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,35 @@
YOUTUBE_RSS_TEMPLATE = "https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"


def get_channel_videos(channel_id: str) -> list[dict]:
"""Fetch recent videos from channel by scraping."""
def _extract_video_ids_from_page(html: str, limit: int = 10) -> list[str]:
"""Extract unique YouTube video IDs from page HTML."""
video_ids: list[str] = []
seen_ids: set[str] = set()
for match in re.finditer(r'"videoId":"([a-zA-Z0-9_-]{11})"', html):
video_id = match.group(1)
if video_id not in seen_ids:
seen_ids.add(video_id)
video_ids.append(video_id)
if len(video_ids) >= limit:
break
return video_ids


def get_channel_videos(channel_id: str, feed_type: str = "all", limit: int = 10) -> list[dict]:
"""Fetch recent videos from channel pages by scraping."""
page_path_by_type = {
"all": "videos",
"videos": "videos",
"shorts": "shorts",
"live": "streams",
}
page_path = page_path_by_type.get(feed_type, "videos")

try:
# Get channel home page to find uploads playlist ID
channel_url = f"https://www.youtube.com/channel/{channel_id}"
html = fetch_url(channel_url)

# Find uploads playlist ID
uploads_match = re.search(r'"browseId":"([^"]+)","browseEndpoint":\{" browsePath":"[^"]*\/video', html)

videos = []
seen_ids = set()

# Extract from videos page
videos_url = f"https://www.youtube.com/channel/{channel_id}/videos"
videos_html = fetch_url(videos_url)

vid_pattern = re.compile(r'"videoId":"([a-zA-Z0-9_-]{11})"')
for vid_match in vid_pattern.finditer(videos_html):
vid = vid_match.group(1)
if vid not in seen_ids:
seen_ids.add(vid)
videos.append({'videoId': vid, 'title': f'Video {vid}', 'published': ''})
if len(videos) >= 10:
break

return videos
page_url = f"https://www.youtube.com/channel/{channel_id}/{page_path}"
page_html = fetch_url(page_url)
video_ids = _extract_video_ids_from_page(page_html, limit=limit)
return [{'videoId': vid, 'title': f'Video {vid}', 'published': ''} for vid in video_ids]
except Exception:
return []

Expand Down Expand Up @@ -289,7 +291,7 @@ def read_feed(feed_url: str, limit: int = 10) -> list[dict]:
xml = fetch_url(feed_url)
return parse_rss_entries(xml, limit=limit)

def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = "http://localhost:8080") -> tuple:
def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str = "http://localhost:8080", feed_type: str = "all") -> tuple:
"""Get RSS feed data for a YouTube channel.

Returns: (youtube_rss, channel_id, channel_name, atom_feed, video_count, invidious_rss, api_endpoints)
Expand All @@ -299,8 +301,8 @@ def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str =
# YouTube's native RSS URL (mostly broken but included for reference)
youtube_rss = YOUTUBE_RSS_TEMPLATE.format(channel_id=channel_id)

# Try to get videos from YouTube channel page
videos = get_channel_videos(channel_id)
# Try to get videos from the selected YouTube channel page
videos = get_channel_videos(channel_id, feed_type=feed_type)
video_count = len(videos)

# Generate Atom feed if we got videos
Expand All @@ -324,8 +326,11 @@ def get_rss_feed(url: str, include_api_endpoints: bool = False, base_url: str =
encoded_url = urllib.parse.quote(url, safe="")
api_endpoints = {
"json_api": f"{base_url.rstrip('/')}/api/feed",
"atom_feed_path": f"{base_url.rstrip('/')}/feed/{encoded_url}",
"atom_feed_query": f"{base_url.rstrip('/')}/feed/?channel_url={urllib.parse.quote(url)}",
"atom_feed_path": f"{base_url.rstrip('/')}/feed/all/{encoded_url}",
"atom_feed_query": f"{base_url.rstrip('/')}/feed/all/{urllib.parse.quote(url, safe='')}",
Comment on lines +329 to +330

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 atom_feed_path and atom_feed_query produce identical URLs

The PR changed atom_feed_query from a distinct query-parameter-based URL format (/feed/?channel_url=...) to the same path-based format as atom_feed_path. Since encoded_url = urllib.parse.quote(url, safe="") at rss_scanner.py:326 and the inline urllib.parse.quote(url, safe='') at line 330 produce the same result, both keys now generate the exact same URL. The CLI at rss_scanner.py:416-417 still prints both as "Atom Feed (path)" and "Atom Feed (query)" — showing identical URLs with different labels, which is confusing to users.

Prompt for agents
The atom_feed_path and atom_feed_query entries in the api_endpoints dictionary (rss_scanner.py:329-330) now produce identical URLs since both use the same path-based format. The old code had atom_feed_query using a query-parameter format (/feed/?channel_url=...) which was a distinct alternative.

Since the query-parameter route was removed in api/app.py, either:
1. Remove the atom_feed_query key entirely and update all references (rss_scanner.py:417 CLI print, and any UI references).
2. Or, if the intent is to show the currently-selected feed type's URL rather than always 'all', replace atom_feed_query with a URL that uses the selected feed_type parameter, e.g. f"{base_url}/feed/{feed_type}/{encoded_url}".

Also update rss_scanner.py:416-417 (the CLI output) to stop printing the duplicate entry.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

"videos_feed": f"{base_url.rstrip('/')}/feed/videos/{encoded_url}",
"shorts_feed": f"{base_url.rstrip('/')}/feed/shorts/{encoded_url}",
"live_feed": f"{base_url.rstrip('/')}/feed/live/{encoded_url}",
}

return youtube_rss, channel_id, channel_name, atom_feed, video_count, invidious_rss, api_endpoints
Expand Down
14 changes: 13 additions & 1 deletion templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ <h1>YouTube RSS Scanner</h1>
<input type="text" id="channelUrl" placeholder="https://www.youtube.com/@channel or https://www.youtube.com/c/username">
</div>

<div class="input-group">
<label>Feed Type</label>
<select id="feedType">
<option value="all">All (combined)</option>
<option value="videos">Videos only</option>
<option value="shorts">Shorts only</option>
<option value="live">Live streams only</option>
</select>
</div>

<button id="getFeedBtn" onclick="getFeed()">Get RSS Feed</button>
<div class="input-group" style="margin-top: 12px;">
<label>
Expand Down Expand Up @@ -184,7 +194,8 @@ <h1>YouTube RSS Scanner</h1>
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: url,
include_api_endpoints: document.getElementById('includeApiEndpoints').checked
include_api_endpoints: document.getElementById('includeApiEndpoints').checked,
feed_type: document.getElementById('feedType').value
})
});

Expand All @@ -207,6 +218,7 @@ <h1>YouTube RSS Scanner</h1>
if (data.api_endpoints && Object.keys(data.api_endpoints).length > 0) {
html += '<div class="atom-output"><div class="feed-label">API Endpoints:</div>';
html += '<div class="feed-link">POST JSON: ' + data.api_endpoints.json_api + '</div>';
if (data.api_endpoints.atom_feed_path) { html += '<div class="feed-link">Selected Feed: ' + data.api_endpoints.atom_feed_path + '</div>'; }
html += '</div>';
}

Expand Down