From 6a110d6b328d932723cc7587f3ddfdca1aed8983 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sun, 14 Jun 2026 07:08:34 +0800 Subject: [PATCH] fix(fetch): accept socks proxy alias --- src/fetch/pyproject.toml | 2 +- src/fetch/src/mcp_server_fetch/server.py | 36 ++++++++++++++++++++++-- src/fetch/tests/test_server.py | 33 ++++++++++++++++++++++ src/fetch/uv.lock | 18 ++++++++++-- 4 files changed, 84 insertions(+), 5 deletions(-) diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml index e2d0d38d0c..529a02b239 100644 --- a/src/fetch/pyproject.toml +++ b/src/fetch/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", ] dependencies = [ - "httpx>=0.27", + "httpx[socks]>=0.27", "markdownify>=0.13.1", "mcp>=1.1.3", "protego>=0.3.1", diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index b42c7b1f6b..0b057f5bce 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -1,3 +1,4 @@ +import os from typing import Annotated, Tuple from urllib.parse import urlparse, urlunparse @@ -63,6 +64,37 @@ def get_robots_txt_url(url: str) -> str: return robots_url +def normalize_proxy_url(proxy_url: str | None) -> str | None: + if proxy_url is None: + return None + + if proxy_url.lower().startswith("socks://"): + return f"socks5://{proxy_url[len('socks://'):]}" + + return proxy_url + + +def proxy_url_for_request(url: str, proxy_url: str | None = None) -> str | None: + if proxy_url: + return normalize_proxy_url(proxy_url) + + scheme = urlparse(url).scheme.lower() + proxy_keys = [] + if scheme == "https": + proxy_keys.extend(("HTTPS_PROXY", "https_proxy")) + elif scheme == "http": + proxy_keys.extend(("HTTP_PROXY", "http_proxy")) + proxy_keys.extend(("ALL_PROXY", "all_proxy")) + + for key in proxy_keys: + env_proxy = os.environ.get(key) + normalized = normalize_proxy_url(env_proxy) + if normalized != env_proxy: + return normalized + + return None + + async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None: """ Check if the URL can be fetched by the user agent according to the robots.txt file. @@ -72,7 +104,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: robot_txt_url = get_robots_txt_url(url) - async with AsyncClient(proxy=proxy_url) as client: + async with AsyncClient(proxy=proxy_url_for_request(robot_txt_url, proxy_url)) as client: try: response = await client.get( robot_txt_url, @@ -116,7 +148,7 @@ async def fetch_url( """ from httpx import AsyncClient, HTTPError - async with AsyncClient(proxy=proxy_url) as client: + async with AsyncClient(proxy=proxy_url_for_request(url, proxy_url)) as client: try: response = await client.get( url, diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py index 96c1cb38c7..8f62a0c65d 100644 --- a/src/fetch/tests/test_server.py +++ b/src/fetch/tests/test_server.py @@ -10,6 +10,7 @@ check_may_autonomously_fetch_url, fetch_url, DEFAULT_USER_AGENT_AUTONOMOUS, + proxy_url_for_request, ) @@ -324,3 +325,35 @@ async def test_fetch_with_proxy(self): # Verify AsyncClient was called with proxy mock_client_class.assert_called_once_with(proxy="http://proxy.example.com:8080") + + @pytest.mark.asyncio + async def test_fetch_accepts_socks_proxy_alias(self): + """Test that socks:// proxy URLs are accepted as SOCKS5 proxies.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.text = '{"data": "test"}' + mock_response.headers = {"content-type": "application/json"} + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_response) + mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None) + + await fetch_url( + "https://example.com/data", + DEFAULT_USER_AGENT_AUTONOMOUS, + proxy_url="socks://127.0.0.1:2080/" + ) + + mock_client_class.assert_called_once_with(proxy="socks5://127.0.0.1:2080/") + + def test_fetch_accepts_socks_proxy_from_environment(self, monkeypatch): + """Test that invalid socks:// environment proxies are normalized.""" + monkeypatch.delenv("HTTPS_PROXY", raising=False) + monkeypatch.delenv("https_proxy", raising=False) + monkeypatch.delenv("ALL_PROXY", raising=False) + monkeypatch.delenv("all_proxy", raising=False) + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:2080/") + + assert proxy_url_for_request("https://example.com/data") == "socks5://127.0.0.1:2080/" diff --git a/src/fetch/uv.lock b/src/fetch/uv.lock index cde68d0aa4..0f5fbedf7e 100644 --- a/src/fetch/uv.lock +++ b/src/fetch/uv.lock @@ -349,6 +349,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", size = 76395, upload-time = "2024-08-27T12:53:59.653Z" }, ] +[package.optional-dependencies] +socks = [ + { name = "socksio" }, +] + [[package]] name = "httpx-sse" version = "0.4.0" @@ -564,7 +569,7 @@ name = "mcp-server-fetch" version = "0.6.3" source = { editable = "." } dependencies = [ - { name = "httpx" }, + { name = "httpx", extra = ["socks"] }, { name = "markdownify" }, { name = "mcp" }, { name = "protego" }, @@ -583,7 +588,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "httpx", specifier = ">=0.27" }, + { name = "httpx", extras = ["socks"], specifier = ">=0.27" }, { name = "markdownify", specifier = ">=0.13.1" }, { name = "mcp", specifier = ">=1.1.3" }, { name = "protego", specifier = ">=0.3.1" }, @@ -1180,6 +1185,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "socksio" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" }, +] + [[package]] name = "soupsieve" version = "2.6"