From 5755b1040b7114bc092bb14bf19db8616f201288 Mon Sep 17 00:00:00 2001
From: orbisai0security <orbisai0security@users.noreply.github.com>
Date: Mon, 30 Mar 2026 03:28:13 +0000
Subject: [PATCH] fix: add URL validation in get_imdbtop.py.DISABLED

The code makes external HTTP requests using requests
---
 web_programming/get_imdbtop.py.DISABLED | 29 ++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/web_programming/get_imdbtop.py.DISABLED b/web_programming/get_imdbtop.py.DISABLED
index 5f7105f83239..e615013bb286 100644
--- a/web_programming/get_imdbtop.py.DISABLED
+++ b/web_programming/get_imdbtop.py.DISABLED
@@ -1,6 +1,31 @@
+from urllib.parse import urlparse
+
 import bs4
 import requests
 
+ALLOWED_HOSTS = {"www.imdb.com"}
+
+
+def _validate_url(url: str) -> str:
+    """Validate a URL against an allowlist of trusted hosts to prevent SSRF attacks.
+
+    Args:
+        url: The URL to validate.
+
+    Returns:
+        The original URL if valid.
+
+    Raises:
+        ValueError: If the URL scheme is not HTTPS or the host is not in the allowlist.
+    """
+    parsed = urlparse(url)
+    if parsed.scheme != "https" or parsed.hostname not in ALLOWED_HOSTS:
+        raise ValueError(
+            f"URL '{url}' is not allowed. Only HTTPS requests to "
+            f"{ALLOWED_HOSTS} are permitted."
+        )
+    return url
+
 
 def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
     return {
@@ -35,7 +60,9 @@ def get_imdb_top_movies(num_movies: int = 5) -> tuple:
         "https://www.imdb.com/search/title?title_type="
         f"feature&sort=num_votes,desc&count={num_movies}"
     )
-    source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
+    source = bs4.BeautifulSoup(
+        requests.get(_validate_url(base_url)).content, "html.parser"
+    )
     return tuple(
         get_movie_data_from_soup(movie)
         for movie in source.find_all("div", class_="lister-item mode-advanced")