diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 255e735..8ca1a1e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.12.8 + rev: v0.15.5 hooks: # Run the linter. - id: ruff-check diff --git a/httpxthrottlecache/filecache/transport.py b/httpxthrottlecache/filecache/transport.py index c852d0a..65239c6 100644 --- a/httpxthrottlecache/filecache/transport.py +++ b/httpxthrottlecache/filecache/transport.py @@ -3,11 +3,11 @@ """ -import calendar import json import logging import os import time +from email.utils import formatdate, parsedate_to_datetime from pathlib import Path from typing import Callable, Iterator, Optional, Tuple, Union from urllib.parse import quote, unquote @@ -125,12 +125,12 @@ def __init__(self, resp: httpx.Response, path: Path, locking: bool, last_modifie self.lock = FileLock(str(path) + ".lock") if locking else None self.fh = None if last_modified: - self.mtime = calendar.timegm(time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S GMT")) + self.mtime = int(parsedate_to_datetime(last_modified).timestamp()) else: self.mtime = None if access_date: - self.atime = calendar.timegm(time.strptime(access_date, "%a, %d %b %Y %H:%M:%S GMT")) + self.atime = int(parsedate_to_datetime(access_date).timestamp()) else: self.atime = None # pragma: no cover @@ -193,12 +193,12 @@ def __init__(self, resp: httpx.Response, path: Path, locking: bool, last_modifie self.tmp = path.with_name(path.name + ".tmp") self.lock = AsyncFileLock(str(path) + ".lock") if locking else None if last_modified: - self.mtime = calendar.timegm(time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S GMT")) + self.mtime = int(parsedate_to_datetime(last_modified).timestamp()) else: self.mtime = None if access_date: - self.atime = calendar.timegm(time.strptime(access_date, "%a, %d %b %Y %H:%M:%S GMT")) + self.atime = int(parsedate_to_datetime(access_date).timestamp()) else: self.atime = None # pragma: no cover @@ -252,8 +252,8 @@ def _cache_hit_response(self, req: httpx.Request, path: Path, status_code: int = Large files are streamed async, so the only blocking events here are for reading small(ish) files """ meta = json.loads(path.with_suffix(path.suffix + ".meta").read_text()) - date = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(meta["fetched"])) - last_modified = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(meta["origin_lm"])) + date = formatdate(timeval=meta["fetched"], usegmt=True) + last_modified = formatdate(timeval=meta["origin_lm"], usegmt=True) ct = meta.get("headers", {}).get("content-type", "application/octet-stream") ce = meta.get("headers", {}).get("content-encoding") @@ -320,7 +320,7 @@ def return_if_fresh(self, request: httpx.Request) -> Tuple[Optional[httpx.Respon else: lm = json.loads(path.with_suffix(path.suffix + ".meta").read_text()).get("origin_lm") if lm: - request.headers["If-Modified-Since"] = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(lm)) + request.headers["If-Modified-Since"] = formatdate(timeval=lm, usegmt=True) return None, path else: return None, None diff --git a/tests/test_misc2.py b/tests/test_misc2.py index cd3dfa5..1c3d97e 100644 --- a/tests/test_misc2.py +++ b/tests/test_misc2.py @@ -109,6 +109,58 @@ def handler(req): assert (end-start) < 3 +def test_locale_independent_date_parsing(manager_cache): + """Regression test for #34 / edgartools#457: date parsing must not depend on system locale. + + On non-English systems (e.g. Chinese, German), time.strptime would crash + parsing English day/month names like 'Fri' or 'Oct'. The fix uses + email.utils.parsedate_to_datetime which is locale-independent. + """ + htc = HttpxThrottleCache( + cache_mode=manager_cache.cache_mode, + cache_dir=manager_cache.cache_dir, + cache_rules={".*": {".*": True}}, + user_agent_factory=lambda: "test", + rate_limiter_enabled=False, + ) + url = "https://example.com/locale-test" + + date_headers = [ + ("Fri, 10 Oct 2025 11:57:10 GMT", "Mon, 06 Oct 2025 08:00:00 GMT"), + ("Sun, 01 Jan 2023 00:00:00 GMT", "Sat, 31 Dec 2022 23:59:59 GMT"), + ("Thu, 15 Feb 2024 12:30:45 GMT", "Wed, 14 Feb 2024 10:00:00 GMT"), + ] + + for date_str, lm_str in date_headers: + chunks = [b"hello world"] + total = len(b"hello world") + + def handler(req): + return Response( + 200, + headers={ + "Content-Length": str(total), + "Last-Modified": lm_str, + "Date": date_str, + }, + stream=httpx.ByteStream(b"hello world"), + request=req, + ) + + with htc.http_client() as client: + next_transport = httpx.MockTransport(handler) + if isinstance(client._transport, httpxthrottlecache.filecache.transport.CachingTransport): + client._transport.transport = next_transport + else: + raise AssertionError(f"Unexpected transport type: {type(client._transport)}") + + r = client.get(url) + assert r.status_code == 200 + + r2 = client.get(url) + assert r2.headers.get("x-cache") == "HIT" + + def test_post_not_cached(manager_cache, monkeypatch): calls = 0 url = "https://example.com/post" diff --git a/uv.lock b/uv.lock index 46c207c..fec197a 100644 --- a/uv.lock +++ b/uv.lock @@ -366,7 +366,7 @@ dependencies = [ [package.dev-dependencies] dev = [ - { name = "edgartools" }, + { name = "edgartools", marker = "platform_python_implementation == 'CPython'" }, { name = "h2" }, { name = "pre-commit-uv" }, { name = "pylint" }, @@ -390,7 +390,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "edgartools", specifier = ">=4.34.1" }, + { name = "edgartools", marker = "platform_python_implementation == 'CPython'", specifier = ">=4.34.1" }, { name = "h2", specifier = ">=4.3.0" }, { name = "pre-commit-uv", specifier = ">=4.1.4" }, { name = "pylint", specifier = ">=3.3.8" },