-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add proxy-aware clients, reports workflow, and tests #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| .PHONY: reports test | ||
|
|
||
| reports: | ||
| CLIENT?=partacademy | ||
| @echo "Generating reports for $(CLIENT)" | ||
| python - <<'PY' | ||
| from app.analysis_sources import fetch_sources | ||
| from app.analysis_pages import fetch_pages | ||
| from app.analysis_goals import fetch_goals | ||
| from app.analysis_gsc import fetch_gsc_sites | ||
| from app.analysis_ym_webmaster import fetch_webmaster_hosts | ||
|
|
||
| print("sources", fetch_sources()) | ||
| print("pages", fetch_pages()) | ||
| print("goals", fetch_goals()) | ||
| print("gsc", fetch_gsc_sites()) | ||
| print("ym webmaster", fetch_webmaster_hosts()) | ||
| PY | ||
|
|
||
| test: | ||
| pytest | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import Mapping | ||
|
|
||
| from app.metrika_client import list_counters | ||
|
|
||
|
|
||
| def fetch_goals() -> Mapping[str, object]: | ||
| return list_counters() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import Mapping | ||
|
|
||
| from app.gsc_client import get_sites | ||
|
|
||
|
|
||
| def fetch_gsc_sites() -> Mapping[str, object]: | ||
| return get_sites() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import Mapping | ||
|
|
||
| from app.metrika_client import list_counters | ||
|
|
||
|
|
||
| def fetch_pages() -> Mapping[str, object]: | ||
| return list_counters() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import Mapping | ||
|
|
||
| from app.metrika_client import list_counters | ||
|
|
||
|
|
||
| def fetch_sources() -> Mapping[str, object]: | ||
| return list_counters() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import Mapping | ||
|
|
||
| from app.ym_webmaster_client import user_hosts | ||
|
|
||
|
|
||
| def fetch_webmaster_hosts() -> Mapping[str, object]: | ||
| return user_hosts() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import os | ||
| from typing import Mapping | ||
|
|
||
| import requests | ||
|
|
||
| from app.http_client import get_default_session, request_json | ||
|
|
||
|
|
||
| GSC_API = "https://searchconsole.googleapis.com/webmasters/v3" | ||
|
|
||
|
|
||
| class GSCError(RuntimeError): | ||
| pass | ||
|
|
||
|
|
||
| def _get_credentials() -> tuple[str, str]: | ||
| client_id = os.getenv("GSC_CLIENT_ID") | ||
| refresh_token = os.getenv("GSC_REFRESH_TOKEN") | ||
| if not client_id or not refresh_token: | ||
| raise GSCError("GSC_CLIENT_ID or GSC_REFRESH_TOKEN is not set") | ||
| return client_id, refresh_token | ||
|
|
||
|
|
||
| def get_sites() -> Mapping[str, object]: | ||
| # Placeholder: real implementation would exchange refresh_token for access_token | ||
| _get_credentials() | ||
| session = get_default_session() | ||
| url = f"{GSC_API}/sites" | ||
| return request_json(session, "GET", url) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GSC client validates credentials but doesn't use themThe |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| """HTTP helpers with proxy and timeout support. | ||
|
|
||
| This module centralizes session construction so all API clients can: | ||
| - respect HTTP(S)_PROXY while allowing per-host bypass via NO_PROXY | ||
| - share a retry-friendly requests.Session with sensible defaults | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import os | ||
| from dataclasses import dataclass | ||
| from typing import Iterable, Mapping, MutableMapping, Sequence | ||
|
|
||
| import requests | ||
|
|
||
|
|
||
| DEFAULT_TIMEOUT = 30 | ||
|
|
||
| # Domains that frequently require direct access without the corporate proxy. | ||
| DEFAULT_NO_PROXY_HOSTS: tuple[str, ...] = ( | ||
| "api-metrika.yandex.net", | ||
| "api-metrika.yandex.ru", | ||
| "api-metrika.yandex.com", | ||
| "api.webmaster.yandex.ru", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NO_PROXY list uses wrong domain for webmaster APIThe Additional Locations (1) |
||
| "api.searchconsole.googleapis.com", | ||
| "searchconsole.googleapis.com", | ||
| ) | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class HttpConfig: | ||
| timeout: int = DEFAULT_TIMEOUT | ||
| extra_no_proxy: Sequence[str] | None = None | ||
|
|
||
|
|
||
| def _merge_no_proxy(env_value: str | None, extra_hosts: Iterable[str]) -> str: | ||
| hosts = [] if not env_value else [h.strip() for h in env_value.split(",") if h.strip()] | ||
| for host in extra_hosts: | ||
| if host and host not in hosts: | ||
| hosts.append(host) | ||
| return ",".join(hosts) | ||
|
|
||
|
|
||
| def _build_proxies(config: HttpConfig) -> MutableMapping[str, str]: | ||
| proxies: MutableMapping[str, str] = {} | ||
| http_proxy = os.getenv("HTTP_PROXY") or os.getenv("http_proxy") | ||
| https_proxy = os.getenv("HTTPS_PROXY") or os.getenv("https_proxy") | ||
|
|
||
| if http_proxy: | ||
| proxies["http"] = http_proxy | ||
| if https_proxy: | ||
| proxies["https"] = https_proxy | ||
|
|
||
| merged_no_proxy = _merge_no_proxy(os.getenv("NO_PROXY") or os.getenv("no_proxy"), DEFAULT_NO_PROXY_HOSTS) | ||
| if config.extra_no_proxy: | ||
| merged_no_proxy = _merge_no_proxy(merged_no_proxy, config.extra_no_proxy) | ||
|
|
||
| proxies["no_proxy"] = merged_no_proxy | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Proxy bypass via
|
||
| return proxies | ||
|
|
||
|
|
||
| def get_default_session(config: HttpConfig | None = None) -> requests.Session: | ||
| cfg = config or HttpConfig() | ||
| session = requests.Session() | ||
| # Доверяем окружению: прокси, CA, etc. | ||
| session.trust_env = True | ||
| session.proxies = _build_proxies(cfg) | ||
| session.headers.update({"User-Agent": "analyzer-machine/1.0"}) | ||
| # Подхватываем пользовательский CA (для MITM‑прокси) | ||
| session.verify = ( | ||
| os.getenv("REQUESTS_CA_BUNDLE") | ||
| or os.getenv("SSL_CERT_FILE") | ||
| or session.verify | ||
| ) | ||
| session.timeout = cfg.timeout # type: ignore[attr-defined] | ||
| return session | ||
|
|
||
|
|
||
| def request_json(session: requests.Session, method: str, url: str, **kwargs) -> Mapping[str, object]: | ||
| timeout = kwargs.pop("timeout", getattr(session, "timeout", DEFAULT_TIMEOUT)) | ||
| response = session.request(method=method, url=url, timeout=timeout, **kwargs) | ||
| response.raise_for_status() | ||
| return response.json() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import os | ||
| from typing import Mapping | ||
|
|
||
| import requests | ||
|
|
||
| from app.http_client import get_default_session, request_json | ||
|
|
||
|
|
||
| METRIKA_API = "https://api-metrika.yandex.net/management/v1" | ||
|
|
||
|
|
||
| class MetrikaError(RuntimeError): | ||
| pass | ||
|
|
||
|
|
||
| def _get_token() -> str: | ||
| token = os.getenv("YANDEX_METRIKA_TOKEN") | ||
| if not token: | ||
| raise MetrikaError("YANDEX_METRIKA_TOKEN is not set") | ||
| return token | ||
|
|
||
|
|
||
| def _session() -> requests.Session: | ||
| return get_default_session() | ||
|
|
||
|
|
||
| def list_counters() -> Mapping[str, object]: | ||
| session = _session() | ||
| headers = {"Authorization": f"OAuth {_get_token()}"} | ||
| url = f"{METRIKA_API}/counters" | ||
| return request_json(session, "GET", url, headers=headers) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import os | ||
| from typing import Mapping | ||
|
|
||
| import requests | ||
|
|
||
| from app.http_client import get_default_session, request_json | ||
|
|
||
|
|
||
| YM_WEB_API = "https://api.webmaster.yandex.net/v4" | ||
|
|
||
|
|
||
| class YMWError(RuntimeError): | ||
| pass | ||
|
|
||
|
|
||
| def _get_token() -> str: | ||
| token = os.getenv("YM_WEBMASTER_TOKEN") | ||
| if not token: | ||
| raise YMWError("YM_WEBMASTER_TOKEN is not set") | ||
| return token | ||
|
|
||
|
|
||
| def _session() -> requests.Session: | ||
| return get_default_session() | ||
|
|
||
|
|
||
| def user_hosts() -> Mapping[str, object]: | ||
| session = _session() | ||
| headers = {"Authorization": f"OAuth {_get_token()}"} | ||
| url = f"{YM_WEB_API}/user/hosts" | ||
| return request_json(session, "GET", url, headers=headers) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| ROOT = Path(__file__).resolve().parent | ||
| PARENT = ROOT | ||
|
|
||
| while PARENT and PARENT.name: | ||
| if (PARENT / "app").exists(): | ||
| sys.path.insert(0, str(PARENT)) | ||
| break | ||
| PARENT = PARENT.parent |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| Cursor tasking guide | ||
| ==================== | ||
|
|
||
| Goal: align this repo with the “full” Analyzer Machine you had in Cursor (API clients, analysis scripts, data cache, reports) and make it runnable in this environment with the provided credentials. | ||
|
|
||
| What exists now | ||
| --------------- | ||
| - Minimal CLI only: `app/cli.py` (Typer commands `clients`, `show`, `validate`) and config loader `app/config.py`. | ||
| - Client configs in `clients/*` plus sample report assets in `docs/reports/partacademy/`. | ||
| - No API clients or analysis jobs are present in the repo yet. | ||
|
|
||
| Tasks for Cursor to implement | ||
| ----------------------------- | ||
| 1) Restore data collectors (paths under `app/`) | ||
| - Recreate the missing modules: `analysis_*.py`, `metrika_client.py`, `gsc_client.py`, `ym_webmaster_client.py` (names from the original Cursor workspace). Place them under `app/` to keep imports consistent. | ||
| - Each client should read tokens from environment variables that are already available here: `YANDEX_METRIKA_TOKEN`, `YM_WEBMASTER_TOKEN`, `GSC_CLIENT_ID`, `GSC_REFRESH_TOKEN`. | ||
| - Add lightweight wrappers that expose simple functions for fetching counters/metrics so the CLI can consume them later. | ||
|
|
||
| 2) Handle proxy restrictions | ||
| - Current environment forces traffic through `http://proxy:8080` and blocks `api-metrika.yandex.net` with `403 CONNECT`. | ||
| - Add proxy configuration hooks (e.g., respecting `HTTP_PROXY`, `HTTPS_PROXY`, `NO_PROXY`) and allow overriding them per request so domains like `api-metrika.yandex.net`/`api-metrika.yandex.ru` can be bypassed if permitted. | ||
| - Provide a retryable HTTP helper (requests.Session with timeouts) to centralize proxy/no-proxy handling for all API clients. | ||
|
|
||
| 3) Ship a runnable workflow | ||
| - Add a `make reports` (or similar) target to orchestrate loading configs, fetching data via the clients, and generating HTML/Markdown reports in `docs/reports/<client>/`. | ||
| - Include a `.env.example` listing required variables (tokens above + optional proxy tuning such as `NO_PROXY`). Do **not** commit real secrets. | ||
| - Extend README with usage examples for the new commands and the expected report outputs. | ||
|
|
||
| 4) Testing and CI hooks | ||
| - Add smoke tests for the HTTP helper and config parsing (Pytest preferred). Make sure they can run offline by mocking HTTP calls. | ||
| - If CI is added, include a job that runs `pytest` and `python -m app.cli validate <client>` for all clients. | ||
|
|
||
| Context for Cursor | ||
| - Known network symptom here: `Tunnel connection failed: 403 Forbidden` when calling Yandex Metrika via the forced proxy. The code should allow opting out of the proxy for specific hosts when possible. | ||
| - Sample client to validate against: `clients/partacademy/config.yaml`. The existing report scaffold lives in `docs/reports/partacademy/`. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,3 +3,5 @@ typer==0.12.5 | |
| rich==13.9.4 | ||
| PyYAML==6.0.2 | ||
| python-dotenv==1.0.1 | ||
| requests==2.32.3 | ||
| pytest==8.3.3 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from app.config import load_client_config | ||
|
|
||
|
|
||
| def test_load_client_config_smoke(): | ||
| cfg, path = load_client_config("partacademy") | ||
| assert cfg.client_name == "partacademy" | ||
| assert path.exists() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from app import http_client | ||
|
|
||
|
|
||
| def test_merge_no_proxy_merges_and_deduplicates(): | ||
| merged = http_client._merge_no_proxy("a,b", ["b", "c"]) | ||
| assert merged.split(",") == ["a", "b", "c"] | ||
|
|
||
|
|
||
| def test_default_session_sets_proxies(monkeypatch): | ||
| monkeypatch.setenv("HTTP_PROXY", "http://proxy:8080") | ||
| monkeypatch.setenv("NO_PROXY", "localhost") | ||
| session = http_client.get_default_session() | ||
| assert session.proxies["http"] == "http://proxy:8080" | ||
| assert "api-metrika.yandex.net" in session.proxies["no_proxy"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makefile uses Make syntax inside shell recipe
The
CLIENT?=partacademyline is inside the recipe (indented with a tab), so it gets executed by the shell rather than Make. The?=conditional assignment operator is Make syntax, not valid shell syntax. This causesmake reportsto fail with a shell error. The assignment needs to be at the Makefile's top level (unindented), or use shell syntax like${CLIENT:-partacademy}inside the recipe.