Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions cmd_arg/arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,14 @@ def main(
rich_help_panel="Account Configuration",
),
] = config.COOKIES,
static_proxy: Annotated[
str,
typer.Option(
"--static_proxy",
help="Static HTTP proxy URL list (comma-separated, for example http://user:pass@host:port,http://host2:port). When set, overrides provider-based proxy rotation.",
rich_help_panel="Proxy Configuration",
),
] = "",
specified_id: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -296,7 +304,7 @@ def main(
str,
typer.Option(
"--ip_proxy_provider_name",
help="IP proxy provider name (kuaidaili | wandouhttp)",
help="IP proxy provider name (kuaidaili | wandouhttp | static)",
rich_help_panel="Proxy Configuration",
),
] = config.IP_PROXY_PROVIDER_NAME,
Expand Down Expand Up @@ -325,12 +333,13 @@ def main(
config.CDP_HEADLESS = enable_headless
config.SAVE_DATA_OPTION = save_data_option.value
config.COOKIES = cookies
config.STATIC_PROXY_URL = static_proxy.strip()
config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_count_singlenotes
config.MAX_CONCURRENCY_NUM = max_concurrency_num
config.SAVE_DATA_PATH = save_data_path
config.ENABLE_IP_PROXY = enable_ip_proxy_value
config.ENABLE_IP_PROXY = enable_ip_proxy_value or bool(config.STATIC_PROXY_URL)
config.IP_PROXY_POOL_COUNT = ip_proxy_pool_count
config.IP_PROXY_PROVIDER_NAME = ip_proxy_provider_name
config.IP_PROXY_PROVIDER_NAME = "static" if config.STATIC_PROXY_URL else ip_proxy_provider_name

# Set platform-specific ID lists for detail/creator mode
if specified_id_list:
Expand Down Expand Up @@ -369,6 +378,7 @@ def main(
save_data_option=config.SAVE_DATA_OPTION,
init_db=init_db_value,
cookies=config.COOKIES,
static_proxy=static_proxy.strip(),
specified_id=specified_id,
creator_id=creator_id,
)
Expand Down
2 changes: 1 addition & 1 deletion config/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
IP_PROXY_POOL_COUNT = 2

# Proxy IP provider name
IP_PROXY_PROVIDER_NAME = "kuaidaili" # kuaidaili | wandouhttp
IP_PROXY_PROVIDER_NAME = "kuaidaili" # kuaidaili | wandouhttp | static

# Setting to True will not open the browser (headless browser)
# Setting False will open a browser
Expand Down
1 change: 1 addition & 0 deletions proxy/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
# @Desc :
from .jishu_http_proxy import new_jisu_http_proxy
from .kuaidl_proxy import new_kuai_daili_proxy
from .static_http_proxy import new_static_http_proxy
from .wandou_http_proxy import new_wandou_http_proxy
78 changes: 78 additions & 0 deletions proxy/providers/static_http_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/proxy/providers/static_http_proxy.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#

# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。


# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2026/3/26
# @Desc : Static HTTP proxy provider implementation
from typing import List
from urllib.parse import urlparse

import config

from proxy import ProxyProvider
from proxy.types import IpInfoModel, ProviderNameEnum


def parse_static_http_proxy(proxy_url: str) -> IpInfoModel:
normalized = proxy_url.strip()
if not normalized:
raise ValueError("--static_proxy is empty")
if "://" not in normalized:
normalized = f"http://{normalized}"

parsed = urlparse(normalized)
if parsed.scheme not in ("http",):
raise ValueError("--static_proxy must use the http scheme")
if not parsed.hostname or not parsed.port:
raise ValueError("--static_proxy must include host and port")

return IpInfoModel(
ip=parsed.hostname,
port=parsed.port,
user=parsed.username or "",
password=parsed.password or "",
protocol="http://",
expired_time_ts=None,
)


def parse_static_http_proxies(proxy_urls: str) -> List[IpInfoModel]:
proxies = [
parse_static_http_proxy(item)
for item in proxy_urls.split(",")
if item.strip()
]
if not proxies:
raise ValueError("--static_proxy is empty")
return proxies


class StaticHttpProxy(ProxyProvider):
def __init__(self):
self.proxy_brand_name = ProviderNameEnum.STATIC_HTTP_PROVIDER.value

async def get_proxy(self, num: int) -> List[IpInfoModel]:
del num
return parse_static_http_proxies(getattr(config, "STATIC_PROXY_URL", ""))


def new_static_http_proxy() -> StaticHttpProxy:
return StaticHttpProxy()
2 changes: 2 additions & 0 deletions proxy/proxy_ip_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import config
from proxy.providers import (
new_kuai_daili_proxy,
new_static_http_proxy,
new_wandou_http_proxy,
)
from tools import utils
Expand Down Expand Up @@ -153,6 +154,7 @@ async def _reload_proxies(self):
IpProxyProvider: Dict[str, ProxyProvider] = {
ProviderNameEnum.KUAI_DAILI_PROVIDER.value: new_kuai_daili_proxy(),
ProviderNameEnum.WANDOU_HTTP_PROVIDER.value: new_wandou_http_proxy(),
ProviderNameEnum.STATIC_HTTP_PROVIDER.value: new_static_http_proxy(),
}


Expand Down
1 change: 1 addition & 0 deletions proxy/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
class ProviderNameEnum(Enum):
KUAI_DAILI_PROVIDER: str = "kuaidaili"
WANDOU_HTTP_PROVIDER: str = "wandouhttp"
STATIC_HTTP_PROVIDER: str = "static"


class IpInfoModel(BaseModel):
Expand Down
60 changes: 60 additions & 0 deletions test/test_static_http_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/test/test_static_http_proxy.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#

from unittest import IsolatedAsyncioTestCase
from unittest.mock import patch

from proxy.providers.static_http_proxy import (
new_static_http_proxy,
parse_static_http_proxies,
parse_static_http_proxy,
)


class TestStaticHttpProxy(IsolatedAsyncioTestCase):
def test_parse_static_http_proxy_with_auth(self):
proxy = parse_static_http_proxy("http://user:pass@127.0.0.1:8899")

self.assertEqual(proxy.ip, "127.0.0.1")
self.assertEqual(proxy.port, 8899)
self.assertEqual(proxy.user, "user")
self.assertEqual(proxy.password, "pass")
self.assertIsNone(proxy.expired_time_ts)

def test_parse_static_http_proxy_accepts_bare_host_port(self):
proxy = parse_static_http_proxy("127.0.0.1:8899")

self.assertEqual(proxy.ip, "127.0.0.1")
self.assertEqual(proxy.port, 8899)
self.assertEqual(proxy.user, "")
self.assertEqual(proxy.password, "")

def test_parse_static_http_proxies_accepts_comma_separated_list(self):
proxies = parse_static_http_proxies(
"http://user:pass@127.0.0.1:8899,127.0.0.2:9900",
)

self.assertEqual(len(proxies), 2)
self.assertEqual(proxies[0].ip, "127.0.0.1")
self.assertEqual(proxies[0].port, 8899)
self.assertEqual(proxies[0].user, "user")
self.assertEqual(proxies[1].ip, "127.0.0.2")
self.assertEqual(proxies[1].port, 9900)

async def test_provider_reads_static_proxy_from_config(self):
provider = new_static_http_proxy()

with patch("config.STATIC_PROXY_URL", "http://127.0.0.1:8899,http://127.0.0.2:9900", create=True):
proxies = await provider.get_proxy(3)

self.assertEqual(len(proxies), 2)
self.assertEqual(proxies[0].ip, "127.0.0.1")
self.assertEqual(proxies[0].port, 8899)
self.assertEqual(proxies[1].ip, "127.0.0.2")
self.assertEqual(proxies[1].port, 9900)