Skip to content

Commit 0150995

Browse files
committed
Handle compressed responses and cache
1 parent ecfbb4c commit 0150995

File tree

5 files changed

+655
-37
lines changed

5 files changed

+655
-37
lines changed

.github/workflows/license-check-python.yml

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,8 @@ jobs:
1010
steps:
1111
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
1212

13-
- name: Create GitHub App Token able to read all CVector repos
14-
id: app
15-
uses: actions/create-github-app-token@v2
13+
- name: Check Python licenses
14+
uses: CVector-Energy/pyproject-license-check@main
1615
with:
1716
app-id: ${{ vars.APP_ID }}
18-
private-key: ${{ secrets.APP_PRIVATE_KEY }}
19-
owner: ${{ github.repository_owner }}
20-
permission-contents: read
21-
22-
- name: Set up uv
23-
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
24-
25-
- name: Configure git to use app token for private repos
26-
run: |
27-
git config --global url."https://x-access-token:${{ steps.app.outputs.token }}@github.com/".insteadOf "https://github.com/"
28-
29-
- name: Run license check
30-
run: |
31-
SKIP_DEPS=(
32-
cvec-commons # Internal library, not published to PyPI
33-
wrapt # BSD-2-Clause license
34-
)
35-
find . \( -name 'pyproject.toml' -o -name 'requirements.txt' \) -print0 | \
36-
xargs -0 -r uvx licensecheck@2025.1.0 --zero \
37-
--skip-dependencies "${SKIP_DEPS[@]}" \
38-
--ignore-licenses MPL \
39-
--requirements-paths
17+
app-private-key: ${{ secrets.APP_PRIVATE_KEY }}

src/cvec/cvec.py

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
import gzip
12
import json
23
import logging
34
import os
5+
import time
6+
import zlib
47
from datetime import datetime
58
from typing import Any, Dict, List, Optional
69
from urllib.error import HTTPError, URLError
710
from urllib.parse import urlencode, urljoin
811
from urllib.request import Request, urlopen
912

13+
from cvec.http_cache import CacheEntry, parse_max_age
14+
1015
from cvec.models.agent_post import AgentPost, AgentPostRecommendation, AgentPostTag
1116
from cvec.models.eav_column import EAVColumn
1217
from cvec.models.eav_filter import EAVFilter
@@ -53,6 +58,9 @@ def __init__(
5358
self._publishable_key = None
5459
self._api_key = api_key or os.environ.get("CVEC_API_KEY")
5560

61+
# HTTP cache for GET requests
62+
self._cache: Dict[str, CacheEntry] = {}
63+
5664
if not self.host:
5765
raise ValueError(
5866
"CVEC_HOST must be set either as an argument or environment variable"
@@ -105,8 +113,51 @@ def _get_headers(self) -> Dict[str, str]:
105113
"Authorization": f"Bearer {self._access_token}",
106114
"Content-Type": "application/json",
107115
"Accept": "application/json",
116+
"Accept-Encoding": "gzip, deflate",
108117
}
109118

119+
@staticmethod
120+
def _read_response(response: Any) -> tuple[bytes, str]:
121+
"""Read and decompress response body.
122+
123+
Returns:
124+
Tuple of (decompressed data, content type)
125+
"""
126+
raw = response.read()
127+
encoding = response.headers.get("Content-Encoding", "")
128+
if encoding == "gzip":
129+
raw = gzip.decompress(raw)
130+
elif encoding == "deflate":
131+
raw = zlib.decompress(raw)
132+
content_type: str = response.headers.get("content-type", "")
133+
return raw, content_type
134+
135+
@staticmethod
136+
def _parse_response_body(response_data: bytes, content_type: str) -> Any:
137+
"""Parse response body based on content type."""
138+
if content_type == "application/vnd.apache.arrow.stream":
139+
return response_data
140+
return json.loads(response_data.decode("utf-8"))
141+
142+
def _process_response(self, response: Any, url: str, method: str) -> Any:
143+
"""Read, decompress, parse, and optionally cache a response."""
144+
response_data, content_type = self._read_response(response)
145+
parsed = self._parse_response_body(response_data, content_type)
146+
147+
if method == "GET":
148+
cache_control = response.headers.get("Cache-Control", "")
149+
max_age = parse_max_age(cache_control)
150+
if max_age is not None:
151+
etag = response.headers.get("ETag", "") or None
152+
self._cache[url] = CacheEntry(
153+
data=parsed,
154+
etag=etag,
155+
max_age=max_age,
156+
stored_at=time.monotonic(),
157+
)
158+
159+
return parsed
160+
110161
def _make_request(
111162
self,
112163
method: str,
@@ -124,6 +175,17 @@ def _make_request(
124175
if filtered_params:
125176
url = f"{url}?{urlencode(filtered_params)}"
126177

178+
# Check cache for GET requests
179+
if method == "GET" and url in self._cache:
180+
entry = self._cache[url]
181+
if time.monotonic() - entry.stored_at < entry.max_age:
182+
return entry.data
183+
# Stale entry with ETag: use conditional request
184+
if entry.etag:
185+
if headers is None:
186+
headers = {}
187+
headers["If-None-Match"] = entry.etag
188+
127189
request_headers = self._get_headers()
128190
if headers:
129191
request_headers.update(headers)
@@ -140,16 +202,16 @@ def make_http_request() -> Any:
140202
url, data=request_body, headers=request_headers, method=method
141203
)
142204
with urlopen(req) as response:
143-
response_data = response.read()
144-
content_type = response.headers.get("content-type", "")
145-
146-
if content_type == "application/vnd.apache.arrow.stream":
147-
return response_data
148-
return json.loads(response_data.decode("utf-8"))
205+
return self._process_response(response, url, method)
149206

150207
try:
151208
return make_http_request()
152209
except HTTPError as e:
210+
# Handle 304 Not Modified
211+
if e.code == 304 and method == "GET" and url in self._cache:
212+
entry = self._cache[url]
213+
entry.stored_at = time.monotonic()
214+
return entry.data
153215
# Handle 401 Unauthorized with token refresh
154216
if e.code == 401 and self._access_token and self._refresh_token:
155217
try:
@@ -164,12 +226,7 @@ def make_http_request() -> Any:
164226
url, data=request_body, headers=request_headers, method=method
165227
)
166228
with urlopen(req) as response:
167-
response_data = response.read()
168-
content_type = response.headers.get("content-type", "")
169-
170-
if content_type == "application/vnd.apache.arrow.stream":
171-
return response_data
172-
return json.loads(response_data.decode("utf-8"))
229+
return self._process_response(response, url, method)
173230
except (HTTPError, URLError, ValueError, KeyError) as refresh_error:
174231
logger.warning(
175232
"Token refresh failed, continuing with original request: %s",

src/cvec/http_cache.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""In-memory HTTP cache for GET requests with Cache-Control and ETag support."""
2+
3+
from dataclasses import dataclass
4+
from typing import Any, Optional
5+
6+
7+
@dataclass
8+
class CacheEntry:
9+
"""A cached HTTP response."""
10+
11+
data: Any
12+
etag: Optional[str]
13+
max_age: int
14+
stored_at: float
15+
16+
17+
def parse_max_age(header: Optional[str]) -> Optional[int]:
18+
"""Parse max-age value from a Cache-Control header.
19+
20+
Returns:
21+
The max-age value in seconds, or None if not present.
22+
"""
23+
if header is None:
24+
return None
25+
for directive in header.split(","):
26+
directive = directive.strip()
27+
if directive.startswith("max-age="):
28+
try:
29+
return int(directive[len("max-age=") :])
30+
except ValueError:
31+
return None
32+
return None

0 commit comments

Comments
 (0)