diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 6e487f4..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.github/workflows/deploy-microservice.yml b/.github/workflows/deploy-microservice.yml new file mode 100644 index 0000000..b39e8ac --- /dev/null +++ b/.github/workflows/deploy-microservice.yml @@ -0,0 +1,129 @@ +name: deploy-microservice + +on: + push: + branches: [feat/daily-microservice, main] + paths: &paths + - deploy/** + - crates/** + - scripts/sync_autocli_jobs.py + - scripts/job_priority_scorer.py + - scripts/job_priority_config.py + - scripts/sponsor_filter.py + - supabase/migrations/** + - rust-toolchain.toml + - .github/workflows/deploy-microservice.yml + pull_request: + paths: *paths + workflow_dispatch: + +env: + IS_MAIN: ${{ github.ref == 'refs/heads/main' }} + IS_PUSH: ${{ github.event_name == 'push' }} + +jobs: + build-autocli-binary: + runs-on: ubuntu-latest + # Build inside Debian Bookworm to match the daily runtime image's GLIBC + # (python:3.12-slim-bookworm = GLIBC 2.36). Building on ubuntu-latest host + # gave GLIBC 2.39 binaries that wouldn't load in the runtime image with + # "GLIBC_2.39 not found". + container: + image: rust:1.94-slim-bookworm + steps: + - uses: actions/checkout@v4 + - name: Install build deps + run: | + apt-get update -qq + apt-get install -y -qq --no-install-recommends pkg-config libssl-dev + - uses: Swatinem/rust-cache@v2 + - run: cargo build --release -p autocli + - name: Verify binary GLIBC requirement is bookworm-compatible + run: | + # objdump may not be present in slim; use readelf + apt-get install -y -qq --no-install-recommends binutils + REQ=$(readelf -V target/release/autocli 2>/dev/null | grep -oE 'GLIBC_[0-9.]+' | sort -V | tail -1) + echo "max GLIBC requirement: $REQ" + # Bookworm ships GLIBC 2.36. Reject if binary needs >2.36. + MAJMIN=$(echo "$REQ" | sed 's/GLIBC_//') + if [ "$(printf '%s\n%s\n' "$MAJMIN" "2.36" | sort -V | tail -1)" != "2.36" ] && [ "$MAJMIN" != "2.36" ]; then + # MAJMIN > 2.36 → fail loud + if [ "$(printf '%s\n%s\n' "$MAJMIN" "2.36" | sort -V | head -1)" = "2.36" ]; then + echo "FAIL: binary needs $REQ but runtime image is GLIBC 2.36" >&2 + exit 1 + fi + fi + - uses: actions/upload-artifact@v4 + with: + name: autocli-bin + path: target/release/autocli + retention-days: 7 + + build-chrome-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # NOTE: slugifier — `type=ref,event=branch` runs metadata-action's + # slugifier, so `feat/daily-microservice` becomes + # `branch-feat-daily-microservice` (Docker-tag-safe). + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-chrome + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . + file: deploy/chrome/Dockerfile + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: ${{ env.IS_PUSH == 'true' }} + + build-daily-image: + runs-on: ubuntu-latest + needs: [build-autocli-binary] + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: autocli-bin + path: deploy/daily/bin + - run: chmod +x deploy/daily/bin/autocli + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-daily + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . + file: deploy/daily/Dockerfile + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: ${{ env.IS_PUSH == 'true' }} diff --git a/.gitignore b/.gitignore index 61deb06..f0efbd0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,20 @@ /opencli test-results*.md twitter-downloads/ + +# Local project files +CHANGELOG.md +/output/ +/test/ +.env + +# Python +__pycache__/ + +# macOS + local tooling noise +.DS_Store +.playwright-mcp/ +.serena/ + +# Phase 0 local build output (CI downloads as artifact; never commit the binary) +deploy/daily/bin/autocli diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..071e443 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,136 @@ +# AGENTS.md +# Core Rule + +Use Serena first for code intelligence on non-trivial coding tasks, and use bounded subagents for complex engineering work. + +Do not claim Serena or subagents were used unless they actually were. If a required tool is unavailable, say so and continue with the smallest safe fallback. + +## Serena Workflow + +At the start of any non-trivial coding task (see definitions below), unfamiliar-code task, bug investigation, shared-symbol change, or cross-file change: + +Definitions: +- **Non-trivial**: Any change affecting ≥1 function with external dependencies, ≥3 files, or requiring architectural reasoning +- **Trivial**: Typo fixes, one-line config changes, single-file docs edits without code path impact + +Do not run the full Serena workflow for trivial tasks unless the code path is unfamiliar or risky. + +1. Check Serena availability. +2. Run `serena.get_current_config`. +3. If the active Serena project does not match the repository root, run `serena.activate_project`. +4. Run `serena.check_onboarding_performed`. +5. If onboarding is missing, run `serena.onboarding`. +6. Read only relevant Serena memories. + +If Serena is unavailable, say: + +> Serena MCP is unavailable; falling back to built-in search/read tools. + +Then continue with targeted `rg`, file reads, and normal verification. + +Do not run the full Serena workflow for typo fixes, simple docs edits, or one-line config changes unless the code path is unfamiliar or risky. + +## Serena Navigation + +Prefer Serena before broad file reads: + +1. `serena.get_symbols_overview` for unfamiliar files. +2. `serena.find_symbol` for functions, classes, handlers, schemas, adapters, providers, components, exported APIs, and config objects. +3. `serena.find_referencing_symbols` before changing shared/public symbols. +4. `serena.find_implementations` for interfaces, adapters, providers, and polymorphic dispatch. +5. `serena.get_diagnostics_for_file` after meaningful edits. + +Use raw `rg`, grep, or full-file reads only when: + +- the target is not code, +- the symbol name is unknown, +- Serena cannot resolve the result, +- Serena has already narrowed the search area, +- or the task is trivial enough that Serena overhead exceeds value. + +Do not read entire large files first. + +## Editing Rules + +Before editing: + +- Map the real call path. +- Check references for shared/exported symbols. +- Pick the smallest safe patch. +- Avoid unrelated files. +- Prefer symbol-level edits for whole functions/classes/methods. +- Add or update tests when behavior changes. + +After editing: + +1. Run the smallest relevant verification first (see Verification Tiers below). +2. Then run broader checks if the change is cross-file or high-risk. +3. Summarize changed files, reason, and verification result. + +Verification Tiers: +- **Tier 1 (local)**: Single unit test or type check for the edited function/method +- **Tier 2 (module)**: All tests in the affected package/directory +- **Tier 3 (integration)**: Cross-module or end-to-end verification for cross-file/high-risk changes + +## Subagent Policy + +Use subagents for: + +- cross-file or cross-module changes, +- unknown root cause, +- refactors, +- security/auth changes, +- data-loss or migration risk, +- queue/worker/scraper/infra changes, +- PR or adversarial review, +- bugs where investigation, review, and fix can be separated. + +Do not use subagents for: + +- direct Q&A, +- typo fixes, +- one-file trivial edits, +- simple config changes, +- tasks where overhead exceeds value. + +If subagents are unavailable, say so and continue in the parent agent using the same sequence manually: explore read-only, review risks, patch only if needed, then verify. + +## Subagent Roles + +- `explorer`: read-only. Map execution paths, symbols, references, data flow, likely owners, and risky files. +- `reviewer`: read-only. Look for correctness bugs, regressions, race conditions, idempotency issues, auth/security problems, migration/data-loss risks, missing tests, and rollback gaps. +- `fixer`: may edit only after the code path is understood. Keep the patch small, avoid unrelated files, use Serena reference checks, and verify targeted changes. + +Subagents may recommend actions, but must not broaden scope, introduce new architecture, or modify unrelated modules without parent approval. + +## Subagent Flow + +For complex tasks: + +1. Spawn `explorer` first. +2. Spawn `reviewer` in parallel only when risk review helps. +3. Wait for read-only findings. +4. Summarize the evidence. +5. Spawn `fixer` only if a patch is needed. +6. Run verification. +7. For high-risk changes, run one final reviewer pass. + +Default limit: + +- `explorer`: at most 1 before editing +- `reviewer`: at most 1 in parallel with explorer or after +- `fixer`: at most 1, only after read-only findings are complete +- Do not create more subagents unless the user explicitly asks or a P0/P1 risk remains unresolved. +- Maximum total: 3 subagents per task (2 read-only + 1 fixer) + +Subagents must return: + +- scope inspected, +- Serena tools used, +- key symbols/files, +- findings, +- risks, +- recommended next action, +- confidence level. + +Parent Codex owns the final decision. diff --git a/CHANGELOG_pipeline.md b/CHANGELOG_pipeline.md new file mode 100644 index 0000000..50cf7d3 --- /dev/null +++ b/CHANGELOG_pipeline.md @@ -0,0 +1,21 @@ +# JD Structured Extraction Pipeline — Changelog + +## [0.1.0] — 2026-05-03 + +### Added + +- **Pipeline orchestrator** (`jd_pipeline.py`): CLI tool (`--input`, `--dry-run`, `--limit`) that reads `output/final.json`, preprocesses JDs, extracts structured JSON via local LLM, and upserts results into Supabase. +- **LLM client** (`jd_pipeline_llm.py`): Async batch client for llama.cpp `/chat/completions` with grammar-constrained generation (`json_schema`), 3-attempt retry (standard → repair with validation feedback → minimal), dynamic timeout, semaphore-limited concurrency, and latency tracking. +- **Database client** (`jd_pipeline_db.py`): Atomic `claim_job` / `upsert_job_structured` / `mark_dead_letter` / `reap_stale_processing` RPCs, extraction_runs bookkeeping, `.env` auto-loading. +- **Config** (`jd_pipeline_config.py`): Version constants, schema definitions (`JD_SCHEMA` + `MINIMAL_SCHEMA`), LLM/Supabase connection params, token limits, context-size tiers. +- **Preprocessor** (`jd_pipeline_preprocess.py`): LinkedIn boilerplate removal, NFKC normalization, control-char strip, SHA-256 hashing. +- **Supabase migrations** (6 files + RPC grants): `jobs` columns for structured extraction, `extraction_runs` table, `dead_letter_records` with stage/error tracking, atomic RPC functions with run-id guards. +- **Per-run reporting**: Console summary with failed-jobs detail (URL, stage, error class, message) + JSON report file in `output/`. + +### Fixed + +- `dead_letter_records.reason` and `source_schema`/`source_job_id` made nullable — prevents write failures when fields are absent. +- `skills.maxItems` raised from 30 → 50 to accommodate verbose model output. +- System prompt improved: explicit rules for skills (technical only, max 25), summary (1–3 sentences), experience_level, employment_type. +- Stale processing reaper threshold adjustable; 172 stuck jobs reaped successfully. +- Duplicate counter increments removed — `PipelineStats.record_*()` methods now single source of truth. \ No newline at end of file diff --git a/adapters/linkedin/recommended.yaml b/adapters/linkedin/recommended.yaml new file mode 100644 index 0000000..78bc2c8 --- /dev/null +++ b/adapters/linkedin/recommended.yaml @@ -0,0 +1,199 @@ +site: linkedin +name: recommended +description: "全量爬取 LinkedIn 推荐职位列表,自动翻页获取所有推荐岗位" +meta_title: "Top job picks for you | LinkedIn" +meta_description: "" +meta_keywords: "" +tags: [linkedin, jobs, recommended, career, recruitment] +domain: www.linkedin.com +strategy: header +browser: true +timeoutSeconds: 1200 + +args: + limit: + type: int + required: false + default: 200 + description: "返回结果数量上限 (0=无限制,爬取全部)" + start: + type: int + required: false + default: 0 + description: "分页偏移量" + with_jd: + type: bool + required: false + default: false + description: "是否输出职位详情字段 jd (false=仅列表字段,true=抓取职位描述)" + +columns: [rank, title, company, location, workplace_type, salary, posted_time, applicant_count, easy_apply, url, external_url, jd] + +pipeline: + - navigate: + url: "https://www.linkedin.com/jobs/collections/recommended/" + settleMs: 5000 + + - evaluate: | + (async () => { + const allUrls = performance.getEntriesByType('resource').map(e => e.name); + let apiMatch = allUrls.find(u => u.includes('/voyager/api/graphql') && u.includes('jobCollectionSlug') && u.includes('recommended')); + if (!apiMatch) { + apiMatch = allUrls.find(u => u.includes('/voyager/api/graphql') && u.includes('jobCards')); + } + if (!apiMatch) return []; + + const jsession = document.cookie.split(';').map(p => p.trim()) + .find(p => p.startsWith('JSESSIONID='))?.slice('JSESSIONID='.length); + if (!jsession) throw new Error('LinkedIn JSESSIONID cookie not found. Please sign in.'); + const csrf = jsession.replace(/^"|"$/g, ''); + + const parsed = new URL(apiMatch); + const queryId = parsed.searchParams.get('queryId') || ''; + + const limit = Number(args.limit ?? 200); + const withJd = args.with_jd === true || args.with_jd === 'true'; + let start = args.start || 0; + const BATCH = 24; + const allItems = []; + const cleanText = (text) => String(text || '').replace(/\s+/g, ' ').trim(); + + while (true) { + const remaining = limit > 0 ? limit - allItems.length : BATCH; + const count = Math.min(BATCH, remaining); + if (count <= 0) break; + + const vars = `(count:${count},jobCollectionSlug:recommended,query:(origin:GENERIC_JOB_COLLECTIONS_LANDING),start:${start})`; + const fetchUrl = `/voyager/api/graphql?variables=${encodeURIComponent(vars).replace(/%3A/gi, ':').replace(/%2C/gi, ',').replace(/%28/gi, '(').replace(/%29/gi, ')')}&queryId=${queryId}`; + + const resp = await fetch(fetchUrl, { + credentials: 'include', + headers: { + 'csrf-token': csrf, + 'x-restli-protocol-version': '2.0.0', + }, + }); + + if (!resp.ok) break; + + const json = await resp.json(); + const elements = json?.data?.jobsDashJobCardsByJobCollections?.elements || []; + + if (elements.length === 0) break; + + for (const element of elements) { + const card = element?.jobCard?.jobPostingCard; + if (!card) continue; + + const urn = card.preDashNormalizedJobPostingUrn || card.entityUrn || ''; + const jobId = urn.match(/(\d+)/)?.[1] || ''; + + const listedItem = (card.footerItems || []).find(i => i?.type === 'LISTED_DATE' && i?.timeAt); + const postedTime = listedItem?.timeAt ? new Date(listedItem.timeAt).toISOString().slice(0, 10) : ''; + + const easyApply = (card.footerItems || []).some(i => i.type === 'EASY_APPLY_TEXT') ? 'true' : 'false'; + + // Extract workplace type from location string (e.g. "London (On-site)") + const locText = card.secondaryDescription?.text || ''; + const workplaceMatch = locText.match(/\((Remote|Hybrid|On-site)\)/i); + const workplaceType = workplaceMatch ? workplaceMatch[1] : ''; + + // Clean location by removing workplace type suffix + const location = locText.replace(/\s*\((Remote|Hybrid|On-site)\)\s*/i, '').trim(); + + // Check for salary in tertiaryDescription + const salary = card.tertiaryDescription?.text || ''; + const url = jobId ? 'https://www.linkedin.com/jobs/view/' + jobId : ''; + + allItems.push({ + title: card.title?.text || card.jobPostingTitle || '', + company: card.primaryDescription?.text || '', + location: location, + workplace_type: workplaceType, + salary: salary, + posted_time: postedTime, + applicant_count: '', + easy_apply: easyApply, + url: url, + external_url: '', + job_id: jobId, + jd: '', + }); + } + + if (elements.length < count) break; + start += elements.length; + + if (limit > 0 && allItems.length >= limit) break; + } + + const extractExternalApplyUrl = (json) => { + const offsiteApply = json?.applyMethod?.['com.linkedin.voyager.jobs.OffsiteApply']; + return offsiteApply?.companyApplyUrl || ''; + }; + + const fetchJobDetails = async (jobId) => { + if (!jobId) return { jd: '', external_url: '' }; + const url = `/voyager/api/jobs/jobPostings/${jobId}`; + const headers = { 'csrf-token': csrf, 'x-restli-protocol-version': '2.0.0' }; + for (let attempt = 0; attempt < 4; attempt++) { + try { + const resp = await fetch(url, { credentials: 'include', headers }); + if (resp.ok) { + const json = await resp.json(); + return { + jd: cleanText(json?.description?.text || json?.description || ''), + external_url: extractExternalApplyUrl(json), + }; + } + + // Retry on transient / throttling responses. + if ([429, 500, 502, 503, 504].includes(resp.status)) { + await sleep(250 * Math.pow(2, attempt)); + continue; + } + + // Non-retryable. + return { jd: '', external_url: '' }; + } catch (_) { + await sleep(250 * Math.pow(2, attempt)); + } + } + return { jd: '', external_url: '' }; + }; + + const detailItems = allItems.filter(item => withJd || item.easy_apply === 'false'); + // LinkedIn will sometimes throttle detail calls when scraping in bulk (e.g. --limit 0). + // Lower concurrency and retry on transient failures to avoid dropping external_url/jd. + const detailConcurrency = 8; + const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + for (let i = 0; i < detailItems.length; i += detailConcurrency) { + const batch = detailItems.slice(i, i + detailConcurrency); + const details = await Promise.all(batch.map(item => fetchJobDetails(item.job_id))); + details.forEach((detail, index) => { + batch[index].external_url = detail.external_url; + if (withJd) { + batch[index].jd = detail.jd; + } + }); + } + + return allItems.slice(0, limit > 0 ? limit : undefined).map((item, i) => ({ + rank: i + 1, + ...item, + })); + })() + + - map: + rank: ${{ item.rank }} + title: ${{ item.title | default("N/A") }} + company: ${{ item.company | default("N/A") }} + location: ${{ item.location | default("N/A") }} + workplace_type: ${{ item.workplace_type | default("N/A") }} + salary: ${{ item.salary | default("N/A") }} + posted_time: ${{ item.posted_time | default("N/A") }} + applicant_count: ${{ item.applicant_count | default("N/A") }} + easy_apply: ${{ item.easy_apply | default("false") }} + url: ${{ item.url }} + external_url: ${{ item.external_url | default("") }} + jd: ${{ item.jd | default("") }} diff --git a/crates/autocli-browser/src/bridge.rs b/crates/autocli-browser/src/bridge.rs index dc40af4..2647726 100644 --- a/crates/autocli-browser/src/bridge.rs +++ b/crates/autocli-browser/src/bridge.rs @@ -31,6 +31,14 @@ impl BrowserBridge { /// Connect to the daemon, starting it if necessary, and return a trait-object page. pub async fn connect(&mut self) -> Result, CliError> { + // CDP-direct path: bypass daemon + extension when AUTOCLI_CDP_ENDPOINT is set. + // Used by the autocli-daily microservice (deploy/SPEC.md §5.1). + if let Ok(endpoint) = std::env::var("AUTOCLI_CDP_ENDPOINT") { + if !endpoint.is_empty() { + let page = crate::CdpPage::connect(&endpoint).await?; + return Ok(Arc::new(page)); + } + } Ok(self.connect_daemon_page().await?) } @@ -61,7 +69,10 @@ impl BrowserBridge { } // Step 3: Wait up to 5s for extension to connect - if self.poll_extension(&client, EXTENSION_INITIAL_WAIT, false).await { + if self + .poll_extension(&client, EXTENSION_INITIAL_WAIT, false) + .await + { return Ok(Arc::new(DaemonPage::new(client, "default"))); } @@ -71,7 +82,10 @@ impl BrowserBridge { wake_chrome(); // Step 5: Wait remaining 25s with progress - if self.poll_extension(&client, EXTENSION_REMAINING_WAIT, true).await { + if self + .poll_extension(&client, EXTENSION_REMAINING_WAIT, true) + .await + { return Ok(Arc::new(DaemonPage::new(client, "default"))); } @@ -243,4 +257,46 @@ mod tests { let bridge = BrowserBridge::default_port(); assert_eq!(bridge.port, DEFAULT_PORT); } + + #[tokio::test] + async fn test_connect_uses_cdp_endpoint_when_env_var_set() { + use autocli_core::CliError; + use std::env; + + struct EnvGuard(&'static str); + impl Drop for EnvGuard { + fn drop(&mut self) { + env::remove_var(self.0); + } + } + + // Set AUTOCLI_CDP_ENDPOINT to an unreachable endpoint. Use a RAII guard so + // the var is removed even on panic, preventing cross-test leakage. + let _guard = EnvGuard("AUTOCLI_CDP_ENDPOINT"); + env::set_var("AUTOCLI_CDP_ENDPOINT", "ws://127.0.0.1:1/devtools/page/never"); + + let mut bridge = BrowserBridge::default_port(); + let result = bridge.connect().await; + let err = match result { + Err(e) => e, + Ok(_) => panic!("connect() should fail against an unreachable CDP endpoint"), + }; + + // Assert the CDP path was taken: the error MUST be BrowserConnect with a CDP-flavored + // message. (Asserting on the variant + message-contains is robust to wording changes + // in the daemon path, since that path produces a different message format.) + match &err { + CliError::BrowserConnect { message, .. } => { + assert!( + message.contains("CDP") || message.contains("Failed to connect"), + "BrowserConnect was raised but not from the CDP path. message: {message}" + ); + assert!( + !message.contains("Chrome is not running"), + "BrowserConnect came from the daemon path — CDP env-var branch was not taken. message: {message}" + ); + } + other => panic!("expected CliError::BrowserConnect, got: {other:?}"), + } + } } diff --git a/crates/autocli-browser/src/cdp.rs b/crates/autocli-browser/src/cdp.rs index 3ede670..dfaf29c 100644 --- a/crates/autocli-browser/src/cdp.rs +++ b/crates/autocli-browser/src/cdp.rs @@ -1,9 +1,9 @@ use async_trait::async_trait; -use futures::{SinkExt, StreamExt}; use autocli_core::{ AutoScrollOptions, CliError, Cookie, CookieOptions, GotoOptions, IPage, InterceptedRequest, NetworkRequest, ScreenshotOptions, SnapshotOptions, TabInfo, WaitOptions, }; +use futures::{SinkExt, StreamExt}; use serde_json::{json, Value}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -15,8 +15,10 @@ use tracing::{debug, error}; use crate::dom_helpers; -type WsSink = - futures::stream::SplitSink>, Message>; +type WsSink = futures::stream::SplitSink< + tokio_tungstenite::WebSocketStream>, + Message, +>; /// Direct Chrome DevTools Protocol page client via WebSocket. /// @@ -46,9 +48,7 @@ impl CdpPage { Ok(Message::Text(text)) => { if let Ok(json) = serde_json::from_str::(&text) { if let Some(id) = json.get("id").and_then(|v| v.as_u64()) { - if let Some(tx) = - reader_pending.write().await.remove(&id) - { + if let Some(tx) = reader_pending.write().await.remove(&id) { let _ = tx.send(json); } } else { @@ -215,9 +215,7 @@ impl IPage for CdpPage { } async fn cookies(&self, _options: Option) -> Result, CliError> { - let result = self - .send_cdp("Network.getCookies", json!({})) - .await?; + let result = self.send_cdp("Network.getCookies", json!({})).await?; let cookies_val = result.get("cookies").cloned().unwrap_or(json!([])); let cookies: Vec = serde_json::from_value(cookies_val).unwrap_or_default(); Ok(cookies) @@ -267,10 +265,7 @@ impl IPage for CdpPage { async fn tabs(&self) -> Result, CliError> { let result = self.send_cdp("Target.getTargets", json!({})).await?; - let targets = result - .get("targetInfos") - .cloned() - .unwrap_or(json!([])); + let targets = result.get("targetInfos").cloned().unwrap_or(json!([])); let mut tabs = Vec::new(); if let Some(arr) = targets.as_array() { for t in arr { @@ -304,7 +299,12 @@ impl IPage for CdpPage { } async fn close(&self) -> Result<(), CliError> { - self.send_cdp("Browser.close", json!({})).await.ok(); + // Intentional no-op: in CDP-direct mode (AUTOCLI_CDP_ENDPOINT) we attach + // to a SHARED Chrome. Sending Browser.close would kill the whole browser + // and every other CDP consumer (e.g. the sibling autocli-chrome + // container that owns Chrome's lifecycle). If a particular page target + // needs cleanup, callers should send Target.closeTarget with a specific + // targetId instead. Ok(()) } diff --git a/crates/autocli-browser/src/daemon.rs b/crates/autocli-browser/src/daemon.rs index de32b6e..e4e0659 100644 --- a/crates/autocli-browser/src/daemon.rs +++ b/crates/autocli-browser/src/daemon.rs @@ -1,3 +1,4 @@ +use autocli_core::CliError; use axum::{ extract::{ ws::{Message, WebSocket}, @@ -9,7 +10,6 @@ use axum::{ Json, Router, }; use futures::{SinkExt, StreamExt}; -use autocli_core::CliError; use serde_json::json; use std::{ collections::HashMap, @@ -136,9 +136,7 @@ async fn health_handler() -> impl IntoResponse { /// POST /ai-generate — proxy AI request to autocli.ai with local token. /// Reads token from ~/.autocli/config.json, streams response back to caller. -async fn ai_generate_proxy_handler( - body: axum::body::Bytes, -) -> impl IntoResponse { +async fn ai_generate_proxy_handler(body: axum::body::Bytes) -> impl IntoResponse { use axum::body::Body; use axum::http::Response; @@ -146,14 +144,18 @@ async fn ai_generate_proxy_handler( let home = std::env::var("HOME") .or_else(|_| std::env::var("USERPROFILE")) .unwrap_or_else(|_| ".".to_string()); - let config_path = std::path::PathBuf::from(&home).join(".autocli").join("config.json"); + let config_path = std::path::PathBuf::from(&home) + .join(".autocli") + .join("config.json"); let token = match std::fs::read_to_string(&config_path) { - Ok(content) => { - serde_json::from_str::(&content) - .ok() - .and_then(|v| v.get("autocli-token").and_then(|t| t.as_str()).map(String::from)) - .unwrap_or_default() - } + Ok(content) => serde_json::from_str::(&content) + .ok() + .and_then(|v| { + v.get("autocli-token") + .and_then(|t| t.as_str()) + .map(String::from) + }) + .unwrap_or_default(), Err(_) => String::new(), }; @@ -161,14 +163,19 @@ async fn ai_generate_proxy_handler( return Response::builder() .status(StatusCode::UNAUTHORIZED) .header("Content-Type", "application/json") - .body(Body::from(r#"{"error":"No token configured. Run: autocli auth"}"#)) + .body(Body::from( + r#"{"error":"No token configured. Run: autocli auth"}"#, + )) .unwrap(); } // Determine API base - let api_base = std::env::var("AUTOCLI_API_BASE") - .unwrap_or_else(|_| "https://www.autocli.ai".to_string()); - let url = format!("{}/api/ai/extension-generate", api_base.trim_end_matches('/')); + let api_base = + std::env::var("AUTOCLI_API_BASE").unwrap_or_else(|_| "https://www.autocli.ai".to_string()); + let url = format!( + "{}/api/ai/extension-generate", + api_base.trim_end_matches('/') + ); // Forward request to remote API let client = match reqwest::Client::builder() @@ -203,7 +210,9 @@ async fn ai_generate_proxy_handler( // Stream the response back while buffering for save+upload let status = resp.status(); - let content_type = resp.headers().get("content-type") + let content_type = resp + .headers() + .get("content-type") .and_then(|v| v.to_str().ok()) .unwrap_or("application/json") .to_string(); @@ -236,7 +245,12 @@ async fn ai_generate_proxy_handler( let _ = tx.send(Ok(bytes)).await; } Err(e) => { - let _ = tx.send(Err(std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))).await; + let _ = tx + .send(Err(std::io::Error::new( + std::io::ErrorKind::Other, + e.to_string(), + ))) + .await; break; } } @@ -259,7 +273,9 @@ async fn ai_generate_proxy_handler( } // Upload to server - if let Err(e) = upload_adapter_to_server(&api_base_for_upload, &token_for_upload, &yaml_content).await { + if let Err(e) = + upload_adapter_to_server(&api_base_for_upload, &token_for_upload, &yaml_content).await + { tracing::warn!(error = %e, "Failed to upload adapter to server"); } }); @@ -279,9 +295,12 @@ fn extract_yaml_from_response(text: &str) -> String { // Try SSE format: data: {"choices":[{"delta":{"content":"..."}}]} for line in text.lines() { if let Some(data) = line.strip_prefix("data: ") { - if data.trim() == "[DONE]" { continue; } + if data.trim() == "[DONE]" { + continue; + } if let Ok(parsed) = serde_json::from_str::(data) { - if let Some(delta) = parsed.get("choices") + if let Some(delta) = parsed + .get("choices") .and_then(|c| c.get(0)) .and_then(|c| c.get("delta")) .and_then(|d| d.get("content")) @@ -296,7 +315,8 @@ fn extract_yaml_from_response(text: &str) -> String { // If no SSE content, try JSON response format if content.is_empty() { if let Ok(parsed) = serde_json::from_str::(text) { - if let Some(msg) = parsed.get("choices") + if let Some(msg) = parsed + .get("choices") .and_then(|c| c.get(0)) .and_then(|c| c.get("message")) .and_then(|m| m.get("content")) @@ -312,33 +332,47 @@ fn extract_yaml_from_response(text: &str) -> String { while let Some(start) = cleaned.find("") { if let Some(end) = cleaned.find("") { cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 8..]); - } else { cleaned = cleaned[..start].to_string(); break; } + } else { + cleaned = cleaned[..start].to_string(); + break; + } } while let Some(start) = cleaned.find("") { if let Some(end) = cleaned.find("") { cleaned = format!("{}{}", &cleaned[..start], &cleaned[end + 11..]); - } else { cleaned = cleaned[..start].to_string(); break; } + } else { + cleaned = cleaned[..start].to_string(); + break; + } } let trimmed = cleaned.trim(); - let trimmed = trimmed.strip_prefix("```yaml").or_else(|| trimmed.strip_prefix("```")).unwrap_or(trimmed); + let trimmed = trimmed + .strip_prefix("```yaml") + .or_else(|| trimmed.strip_prefix("```")) + .unwrap_or(trimmed); let trimmed = trimmed.strip_suffix("```").unwrap_or(trimmed); trimmed.trim().to_string() } /// Save adapter YAML to ~/.autocli/adapters/{site}/{name}.yaml fn save_adapter_locally(home: &str, yaml: &str) -> Result<(), String> { - let site = yaml.lines() + let site = yaml + .lines() .find(|l| l.starts_with("site:")) .and_then(|l| l.strip_prefix("site:")) .map(|s| s.trim().trim_matches('"').to_string()) .unwrap_or_else(|| "unknown".to_string()); - let name = yaml.lines() + let name = yaml + .lines() .find(|l| l.starts_with("name:")) .and_then(|l| l.strip_prefix("name:")) .map(|s| s.trim().trim_matches('"').to_string()) .unwrap_or_else(|| "default".to_string()); - let dir = std::path::PathBuf::from(home).join(".autocli").join("adapters").join(&site); + let dir = std::path::PathBuf::from(home) + .join(".autocli") + .join("adapters") + .join(&site); std::fs::create_dir_all(&dir).map_err(|e| format!("mkdir: {}", e))?; let path = dir.join(format!("{}.yaml", name)); std::fs::write(&path, yaml).map_err(|e| format!("write: {}", e))?; @@ -415,7 +449,11 @@ async fn command_handler( // Create a oneshot channel for the result let (tx, rx) = oneshot::channel::(); - state.pending_commands.write().await.insert(cmd_id.clone(), tx); + state + .pending_commands + .write() + .await + .insert(cmd_id.clone(), tx); // Forward command to extension via WebSocket { @@ -446,7 +484,10 @@ async fn command_handler( } else { StatusCode::UNPROCESSABLE_ENTITY }; - (status, Json(serde_json::to_value(result).unwrap_or(json!({})))) + ( + status, + Json(serde_json::to_value(result).unwrap_or(json!({}))), + ) } Ok(Err(_)) => ( StatusCode::INTERNAL_SERVER_ERROR, @@ -510,7 +551,11 @@ async fn handle_extension_ws(state: Arc, socket: WebSocket) { continue; } if msg_type == "ai-generate" { - let stream_id = parsed.get("streamId").and_then(|s| s.as_str()).unwrap_or("").to_string(); + let stream_id = parsed + .get("streamId") + .and_then(|s| s.as_str()) + .unwrap_or("") + .to_string(); let state_clone = state.clone(); let body_json = parsed.clone(); tokio::spawn(async move { @@ -566,7 +611,11 @@ async fn handle_extension_ws(state: Arc, socket: WebSocket) { } // ─── AI Stream via existing extension WS ──────────────────────── -async fn handle_ai_stream_via_ws(state: Arc, stream_id: String, body: serde_json::Value) { +async fn handle_ai_stream_via_ws( + state: Arc, + stream_id: String, + body: serde_json::Value, +) { // Helper to send message back through extension WS async fn send_ws(state: &Arc, msg: serde_json::Value) { let mut tx = state.extension_tx.lock().await; @@ -579,33 +628,57 @@ async fn handle_ai_stream_via_ws(state: Arc, stream_id: String, bod let home = std::env::var("HOME") .or_else(|_| std::env::var("USERPROFILE")) .unwrap_or_else(|_| ".".to_string()); - let config_path = std::path::PathBuf::from(&home).join(".autocli").join("config.json"); + let config_path = std::path::PathBuf::from(&home) + .join(".autocli") + .join("config.json"); let token = std::fs::read_to_string(&config_path) .ok() .and_then(|c| serde_json::from_str::(&c).ok()) - .and_then(|v| v.get("autocli-token").and_then(|t| t.as_str()).map(String::from)) + .and_then(|v| { + v.get("autocli-token") + .and_then(|t| t.as_str()) + .map(String::from) + }) .unwrap_or_default(); if token.is_empty() { - send_ws(&state, json!({ "type": "ai-stream-error", "streamId": stream_id, "error": "No token" })).await; + send_ws( + &state, + json!({ "type": "ai-stream-error", "streamId": stream_id, "error": "No token" }), + ) + .await; return; } - let api_base = std::env::var("AUTOCLI_API_BASE") - .unwrap_or_else(|_| "https://www.autocli.ai".to_string()); - let url = format!("{}/api/ai/extension-generate", api_base.trim_end_matches('/')); + let api_base = + std::env::var("AUTOCLI_API_BASE").unwrap_or_else(|_| "https://www.autocli.ai".to_string()); + let url = format!( + "{}/api/ai/extension-generate", + api_base.trim_end_matches('/') + ); // Build request body from the message - let is_private = body.get("body").and_then(|b| b.get("private")).and_then(|v| v.as_bool()).unwrap_or(false); + let is_private = body + .get("body") + .and_then(|b| b.get("private")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); let request_body = json!({ "captured_data": body.get("body").and_then(|b| b.get("captured_data")).cloned().unwrap_or(json!(null)), "stream": true, }); - let client = match reqwest::Client::builder().timeout(std::time::Duration::from_secs(300)).build() { + let client = match reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build() + { Ok(c) => c, Err(e) => { - send_ws(&state, json!({ "type": "ai-stream-error", "streamId": stream_id, "error": e.to_string() })).await; + send_ws( + &state, + json!({ "type": "ai-stream-error", "streamId": stream_id, "error": e.to_string() }), + ) + .await; return; } }; @@ -620,7 +693,11 @@ async fn handle_ai_stream_via_ws(state: Arc, stream_id: String, bod { Ok(r) => r, Err(e) => { - send_ws(&state, json!({ "type": "ai-stream-error", "streamId": stream_id, "error": e.to_string() })).await; + send_ws( + &state, + json!({ "type": "ai-stream-error", "streamId": stream_id, "error": e.to_string() }), + ) + .await; return; } }; @@ -637,11 +714,19 @@ async fn handle_ai_stream_via_ws(state: Arc, stream_id: String, bod while let Some(chunk) = resp.chunk().await.unwrap_or(None) { all_bytes.extend_from_slice(&chunk); if let Ok(text) = std::str::from_utf8(&chunk) { - send_ws(&state, json!({ "type": "ai-stream-chunk", "streamId": stream_id, "data": text })).await; + send_ws( + &state, + json!({ "type": "ai-stream-chunk", "streamId": stream_id, "data": text }), + ) + .await; } } - send_ws(&state, json!({ "type": "ai-stream-done", "streamId": stream_id })).await; + send_ws( + &state, + json!({ "type": "ai-stream-done", "streamId": stream_id }), + ) + .await; // Post-processing: save + upload let full_text = String::from_utf8_lossy(&all_bytes).to_string(); @@ -665,33 +750,55 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { // Wait for client message with request body let request_body = match socket.recv().await { Some(Ok(Message::Text(text))) => text, - _ => { let _ = socket.close().await; return; } + _ => { + let _ = socket.close().await; + return; + } }; // Read token let home = std::env::var("HOME") .or_else(|_| std::env::var("USERPROFILE")) .unwrap_or_else(|_| ".".to_string()); - let config_path = std::path::PathBuf::from(&home).join(".autocli").join("config.json"); + let config_path = std::path::PathBuf::from(&home) + .join(".autocli") + .join("config.json"); let token = std::fs::read_to_string(&config_path) .ok() .and_then(|c| serde_json::from_str::(&c).ok()) - .and_then(|v| v.get("autocli-token").and_then(|t| t.as_str()).map(String::from)) + .and_then(|v| { + v.get("autocli-token") + .and_then(|t| t.as_str()) + .map(String::from) + }) .unwrap_or_default(); if token.is_empty() { - let _ = socket.send(Message::Text("data: {\"error\":\"No token configured\"}\n\n".into())).await; + let _ = socket + .send(Message::Text( + "data: {\"error\":\"No token configured\"}\n\n".into(), + )) + .await; let _ = socket.close().await; return; } - let api_base = std::env::var("AUTOCLI_API_BASE") - .unwrap_or_else(|_| "https://www.autocli.ai".to_string()); - let url = format!("{}/api/ai/extension-generate", api_base.trim_end_matches('/')); + let api_base = + std::env::var("AUTOCLI_API_BASE").unwrap_or_else(|_| "https://www.autocli.ai".to_string()); + let url = format!( + "{}/api/ai/extension-generate", + api_base.trim_end_matches('/') + ); - let client = match reqwest::Client::builder().timeout(std::time::Duration::from_secs(300)).build() { + let client = match reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build() + { Ok(c) => c, - Err(_) => { let _ = socket.close().await; return; } + Err(_) => { + let _ = socket.close().await; + return; + } }; let mut resp = match client @@ -704,7 +811,11 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { { Ok(r) => r, Err(e) => { - let _ = socket.send(Message::Text(format!("data: {{\"error\":\"{}\"}}\n\n", e).into())).await; + let _ = socket + .send(Message::Text( + format!("data: {{\"error\":\"{}\"}}\n\n", e).into(), + )) + .await; let _ = socket.close().await; return; } @@ -713,8 +824,20 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { if !resp.status().is_success() { let status = resp.status().as_u16(); let body = resp.text().await.unwrap_or_default(); - let err_body = body.replace('"', "\\\"").chars().take(200).collect::(); - let _ = socket.send(Message::Text(format!("data: {{\"error\":\"{}: {}\"}}\n\nstatus: {}", status, err_body, status).into())).await; + let err_body = body + .replace('"', "\\\"") + .chars() + .take(200) + .collect::(); + let _ = socket + .send(Message::Text( + format!( + "data: {{\"error\":\"{}: {}\"}}\n\nstatus: {}", + status, err_body, status + ) + .into(), + )) + .await; let _ = socket.close().await; // Don't save/upload on error @@ -729,7 +852,12 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { let mut chunk_count = 0u32; while let Some(chunk) = resp.chunk().await.unwrap_or(None) { chunk_count += 1; - tracing::debug!(chunk_count, size = chunk.len(), elapsed_ms = stream_start.elapsed().as_millis() as u64, "AI stream chunk received"); + tracing::debug!( + chunk_count, + size = chunk.len(), + elapsed_ms = stream_start.elapsed().as_millis() as u64, + "AI stream chunk received" + ); all_bytes.extend_from_slice(&chunk); if let Ok(text) = std::str::from_utf8(&chunk) { line_buffer.push_str(text); @@ -740,7 +868,12 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { } } } - tracing::debug!(total_chunks = chunk_count, total_bytes = all_bytes.len(), total_ms = stream_start.elapsed().as_millis() as u64, "AI stream complete"); + tracing::debug!( + total_chunks = chunk_count, + total_bytes = all_bytes.len(), + total_ms = stream_start.elapsed().as_millis() as u64, + "AI stream complete" + ); if !line_buffer.is_empty() { let _ = socket.send(Message::Text(line_buffer.into())).await; } @@ -758,31 +891,34 @@ async fn handle_ai_stream_socket(mut socket: WebSocket) { /// Compare semver: returns true if `latest` is newer than `current`. fn is_newer_version(latest: &str, current: &str) -> bool { - let parse = |v: &str| -> Vec { - v.split('.').filter_map(|s| s.parse().ok()).collect() - }; + let parse = |v: &str| -> Vec { v.split('.').filter_map(|s| s.parse().ok()).collect() }; let l = parse(latest); let c = parse(current); for i in 0..3 { let lv = l.get(i).copied().unwrap_or(0); let cv = c.get(i).copied().unwrap_or(0); - if lv > cv { return true; } - if lv < cv { return false; } + if lv > cv { + return true; + } + if lv < cv { + return false; + } } false } // ─── Update check ─────────────────────────────────────────────── -static CACHED_UPDATE: std::sync::OnceLock>> = std::sync::OnceLock::new(); +static CACHED_UPDATE: std::sync::OnceLock>> = + std::sync::OnceLock::new(); fn update_cache() -> &'static tokio::sync::RwLock> { CACHED_UPDATE.get_or_init(|| tokio::sync::RwLock::new(None)) } async fn check_and_cache_update() { - let api_base = std::env::var("AUTOCLI_API_BASE") - .unwrap_or_else(|_| "https://www.autocli.ai".to_string()); + let api_base = + std::env::var("AUTOCLI_API_BASE").unwrap_or_else(|_| "https://www.autocli.ai".to_string()); let url = format!("{}/api/version/latest", api_base.trim_end_matches('/')); let client = match reqwest::Client::builder() @@ -823,11 +959,14 @@ async fn check_update_handler() -> impl IntoResponse { let cache = update_cache().read().await; match &*cache { Some(data) => (StatusCode::OK, Json(data.clone())), - None => (StatusCode::OK, Json(json!({ - "current_version": env!("CARGO_PKG_VERSION"), - "latest_version": "", - "update_available": false, - }))), + None => ( + StatusCode::OK, + Json(json!({ + "current_version": env!("CARGO_PKG_VERSION"), + "latest_version": "", + "update_available": false, + })), + ), } } diff --git a/crates/autocli-browser/src/daemon_client.rs b/crates/autocli-browser/src/daemon_client.rs index be82789..92117fc 100644 --- a/crates/autocli-browser/src/daemon_client.rs +++ b/crates/autocli-browser/src/daemon_client.rs @@ -17,8 +17,10 @@ const RETRY_DELAYS_MS: [u64; 4] = [200, 500, 1000, 2000]; impl DaemonClient { /// Create a new client pointing at the given port on localhost. pub fn new(port: u16) -> Self { + // 5 minute timeout — linkedin --limit 0 --with_jd can take several minutes + // due to scrolling the full job list and fetching descriptions for each. let client = reqwest::Client::builder() - .timeout(Duration::from_secs(30)) + .timeout(Duration::from_secs(300)) .build() .expect("failed to build reqwest client"); Self { diff --git a/crates/autocli-browser/src/lib.rs b/crates/autocli-browser/src/lib.rs index 004ac3b..d8ab2e5 100644 --- a/crates/autocli-browser/src/lib.rs +++ b/crates/autocli-browser/src/lib.rs @@ -1,18 +1,18 @@ // Architecture and protocol design derived from OpenCLI // (https://github.com/jackwener/opencli) by jackwener, Apache-2.0 -pub mod types; +pub mod bridge; +pub mod cdp; +pub mod daemon; pub mod daemon_client; -pub mod page; pub mod dom_helpers; +pub mod page; pub mod stealth; -pub mod daemon; -pub mod bridge; -pub mod cdp; +pub mod types; pub use bridge::BrowserBridge; -pub use page::DaemonPage; pub use cdp::CdpPage; pub use daemon::Daemon; pub use daemon_client::DaemonClient; +pub use page::DaemonPage; pub use types::{DaemonCommand, DaemonResult, ReadArticle}; diff --git a/crates/autocli-cli/src/main.rs b/crates/autocli-cli/src/main.rs index ddcbb2b..4f49e5f 100644 --- a/crates/autocli-cli/src/main.rs +++ b/crates/autocli-cli/src/main.rs @@ -539,6 +539,21 @@ async fn main() { tracing::debug!(port, version = current_version, "Spawned daemon in background"); } } + + // Wait for newly-spawned daemon to become ready before proceeding. + // Without this, BrowserBridge may find the daemon listening but not fully + // initialized, causing the first /command POST to fail with a transient + // "Request error" and triggering WARN retries. + let health_url = format!("http://127.0.0.1:{}/health", port); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + while std::time::Instant::now() < deadline { + if let Some(c) = &client { + if let Ok(resp) = c.get(&health_url).send().await { + if resp.status().is_success() { break; } + } + } + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + } } } diff --git a/crates/autocli-discovery/src/yaml_parser.rs b/crates/autocli-discovery/src/yaml_parser.rs index c71eacb..44ab4b3 100644 --- a/crates/autocli-discovery/src/yaml_parser.rs +++ b/crates/autocli-discovery/src/yaml_parser.rs @@ -185,4 +185,32 @@ domain: www.bilibili.com let yaml = "name: test\n"; assert!(parse_yaml_adapter(yaml).is_err()); } + + #[test] + fn test_linkedin_recommended_declares_jd_toggle() { + let yaml = include_str!("../../../adapters/linkedin/recommended.yaml"); + let cmd = parse_yaml_adapter(yaml).unwrap(); + + assert_eq!(cmd.site, "linkedin"); + assert_eq!(cmd.name, "recommended"); + assert!(cmd.func.is_none(), "linkedin recommended must use YAML pipeline path"); + assert!(cmd.columns.iter().any(|col| col == "jd")); + assert!(cmd.columns.iter().any(|col| col == "external_url")); + assert!(yaml.contains("companyApplyUrl")); + + let with_jd = cmd + .args + .iter() + .find(|arg| arg.name == "with_jd") + .expect("with_jd arg should be declared by the YAML adapter"); + assert_eq!(with_jd.arg_type, ArgType::Bool); + assert_eq!(with_jd.default, Some(Value::Bool(false))); + + let pipeline = serde_json::to_string(&cmd.pipeline).unwrap(); + assert!(pipeline.contains("withJd")); + assert!(pipeline.contains("/voyager/api/jobs/jobPostings/")); + assert!(pipeline.contains("description")); + assert!(pipeline.contains("jd:")); + assert!(pipeline.contains("limit > 0")); + } } diff --git a/deploy/.env.example b/deploy/.env.example new file mode 100644 index 0000000..8e1b295 --- /dev/null +++ b/deploy/.env.example @@ -0,0 +1,22 @@ +# Cloudflare Tunnel (token mode — credentials NOT used) +CLOUDFLARE_TUNNEL_TOKEN= + +# Supabase +SUPABASE_URL= +SUPABASE_SERVICE_ROLE_KEY= +SUPABASE_ANON_KEY= + +# API auth (defense-in-depth on top of Cloudflare Access). +# Generate: openssl rand -hex 32 +API_RUN_TOKEN= + +# VNC password (generate: openssl rand -base64 18). NEVER use the dev value 'stagehand' in prod. +VNC_PASSWORD= + +# Grafana admin (generate: openssl rand -hex 16) +GF_SECURITY_ADMIN_PASSWORD= + +# Scheduling +TZ=Europe/London +# Cron schedule lives in deploy/daily/crontab (supercronic reads the file +# verbatim, no env substitution). To change the time, edit that file. diff --git a/deploy/PLAN.md b/deploy/PLAN.md new file mode 100644 index 0000000..834399d --- /dev/null +++ b/deploy/PLAN.md @@ -0,0 +1,2501 @@ +# AutoCLI Daily Microservice — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use `superpowers:subagent-driven-development` (recommended) or `superpowers:executing-plans` to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Convert the manual daily `autocli linkedin recommended … | uv run scripts/sync_autocli_jobs.py` flow into an auto-scheduled, externally accessible microservice (5 containers, daily cron, HTTP API, Cloudflare Tunnel, Prometheus+Grafana), deployed to `100.108.80.9`. + +**Architecture:** 5-container docker-compose stack on a dedicated Docker host. `autocli-chrome` (Stagehand-style VNC Chromium with persistent profile + CDP 9222). `autocli-daily` (Python+uv+supercronic+FastAPI; pre-built `autocli` linux/amd64 binary copied in). `cloudflared` (Tunnel token mode, ingress managed in Cloudflare dashboard). `prometheus` + `grafana` (monitoring). Pull-based deploy via GHCR + existing Watchtower. Cloudflare Access enforces auth on all 4 subdomains (`vnc/cdp/api/grafana`.autocli.). Reference: [SPEC.md](./SPEC.md). + +**Tech Stack:** Rust 1.94 (autocli binary), Debian Bookworm (base), Chromium + Xvfb + x11vnc + noVNC (chrome image), Python 3.12 + uv + FastAPI + supercronic (daily image), `cloudflare/cloudflared:2025.4.0`, `prom/prometheus:v3.5.0`, `grafana/grafana:11.6.0`, GitHub Actions (CI), `docker/metadata-action@v5`, `docker/build-push-action@v6`. + +**Worktree:** `/Users/sanchezrick/Documents/Github/AutoCLI-daily/` on branch `feat/daily-microservice` (already created from `origin/main`). + +--- + +## File map + +### Created in this PR + +| Path | Purpose | +|---|---| +| `rust-toolchain.toml` | Workspace toolchain pin (1.94) — keeps local / CI / Phase 0 builder in sync | +| `crates/autocli-browser/src/bridge.rs` | **MODIFY**: add `AUTOCLI_CDP_ENDPOINT` branch returning `CdpPage` | +| `crates/autocli-browser/src/bridge.rs` tests | **MODIFY**: add unit test for the env-var branch | +| `deploy/chrome/Dockerfile` | Stagehand-style Chromium + Xvfb + noVNC + socat (copy of my-stagehand-app/Dockerfile.chrome) | +| `deploy/chrome/entrypoint-vnc.sh` | Xvfb → x11vnc → noVNC → socat → Chromium (copy of my-stagehand-app/scripts/entrypoint-vnc.sh) | +| `deploy/daily/Dockerfile` | Python 3.12 + uv + supercronic + pre-built `autocli` binary | +| `deploy/daily/entrypoint.sh` | Boot-time `cdp-discover.sh` → supercronic + uvicorn under tini | +| `deploy/daily/cdp-discover.sh` | `GET /json/list` → if empty `PUT /json/new?about:blank` → rewrite host → write `/run/cdp-endpoint.env` | +| `deploy/daily/run-daily.sh` | `flock` + re-run `cdp-discover.sh` + `source env` + autocli + sync + retry policy | +| `deploy/daily/crontab` | `0 3 * * * /app/run-daily.sh` (TZ=Europe/London) + 04:00 retention sweep | +| `deploy/daily/api/pyproject.toml` | uv project: fastapi, uvicorn, supabase, prometheus-client, httpx | +| `deploy/daily/api/main.py` | FastAPI: `/api/{status,run,logs,metrics,health}` + `/jobs` | +| `deploy/daily/api/trigger.py` | Shared subprocess executor (cron + `/api/run` call into same code) | +| `deploy/daily/api/tests/test_main.py` | FastAPI auth/route tests with httpx TestClient | +| `deploy/prometheus/prometheus.yml` | Single scrape job for `autocli-daily:8080` with `metrics_path: /api/metrics` | +| `deploy/grafana/provisioning/datasources/prometheus.yml` | Pre-provisioned Prometheus datasource | +| `deploy/grafana/provisioning/dashboards/dashboards.yml` | Dashboard provider config | +| `deploy/grafana/provisioning/dashboards/autocli.json` | The 6-panel dashboard JSON | +| `deploy/docker-compose.yml` | Production stack (5 services + named volumes + watchtower labels) | +| `deploy/docker-compose.local.yml` | Local override: bind localhost ports, disable cloudflared | +| `deploy/.env.example` | Empty template — every required var listed | +| `deploy/README.md` | Deploy runbook + secret transfer + Cloudflare dashboard checklist | +| `.github/workflows/deploy-microservice.yml` | CI: rust build → 2 docker images → GHCR push with conditional tags | + +### NOT modified + +`crates/autocli-pipeline`, `autocli-discovery`, `autocli-core`, `autocli-cli`, every YAML adapter, every script under `scripts/` — these stay untouched because the IPage trait is the only contract the Rust patch changes. + +--- + +## Phase A — Repo hygiene + Rust prerequisite patch + +### Task 1: Pin Rust toolchain workspace-wide + +**Files:** +- Create: `rust-toolchain.toml` + +- [ ] **Step 1: Create the toolchain file** + +Write `rust-toolchain.toml` at repo root: +```toml +[toolchain] +channel = "1.94" +components = ["rustfmt", "clippy"] +profile = "minimal" +``` + +- [ ] **Step 2: Verify cargo picks it up** + +Run from worktree root: +```bash +cargo --version +# Expected: cargo 1.94.x (anything) +rustup show active-toolchain +# Expected: 1.94- (from 'rust-toolchain.toml') +``` + +- [ ] **Step 3: Commit** + +```bash +git add rust-toolchain.toml +git commit -m "chore: pin workspace Rust toolchain to 1.94 + +Aligns local dev (operator was on rustc 1.94.1), CI (was using +ubuntu-latest default), and the Phase 0 Docker builder +(deploy/SPEC.md). Single source of truth; future bumps touch only +this file." +``` + +--- + +### Task 2: BrowserBridge CDP-wiring patch — write failing test + +**Files:** +- Modify: `crates/autocli-browser/src/bridge.rs` (test module at bottom) + +- [ ] **Step 1: Append a new failing test to the existing `#[cfg(test)] mod tests` block** + +Open `crates/autocli-browser/src/bridge.rs`. Find the `#[cfg(test)] mod tests` at the bottom. Add this test below `test_bridge_default_port`: + +```rust + #[tokio::test] + async fn test_connect_uses_cdp_endpoint_when_env_var_set() { + use std::env; + + // Set AUTOCLI_CDP_ENDPOINT to an unreachable address. + // We expect a BrowserConnect error (not a "Chrome not running" error from the daemon path). + // SAFETY: tests in this module are single-threaded by default; if more env-touching tests + // are added later, switch to `serial_test`. + env::set_var("AUTOCLI_CDP_ENDPOINT", "ws://127.0.0.1:1/devtools/page/never"); + let mut bridge = BrowserBridge::default_port(); + let result = bridge.connect().await; + env::remove_var("AUTOCLI_CDP_ENDPOINT"); + + // The CDP path took over (not the daemon path) — error must come from CdpPage::connect. + // CdpPage::connect on unreachable target produces CliError::BrowserConnect with + // message starting "Failed to connect to CDP endpoint". + let err = result.expect_err("connect() should fail against an unreachable CDP endpoint"); + let msg = format!("{err}"); + assert!( + msg.contains("Failed to connect to CDP endpoint") || msg.contains("CDP"), + "expected CDP-path error, got: {msg}" + ); + assert!( + !msg.contains("Chrome is not running"), + "got daemon-path error — CDP env-var branch was not taken: {msg}" + ); + } +``` + +- [ ] **Step 2: Run the test and confirm it fails** + +```bash +cargo test -p autocli-browser bridge::tests::test_connect_uses_cdp_endpoint_when_env_var_set -- --nocapture +``` + +Expected: test FAILS — either because the daemon path took over and reports "Chrome is not running" (the bug we are about to fix), or because `connect()` doesn't yet check the env var at all. + +--- + +### Task 3: BrowserBridge CDP-wiring patch — implement + +**Files:** +- Modify: `crates/autocli-browser/src/bridge.rs:33-35` + +- [ ] **Step 1: Inspect current `connect()`** + +```bash +grep -n "pub async fn connect" crates/autocli-browser/src/bridge.rs +``` + +Expected: line 33 starts `pub async fn connect`. + +- [ ] **Step 2: Replace `connect()` body with the env-var branch** + +In `bridge.rs`, find: +```rust + pub async fn connect(&mut self) -> Result, CliError> { + Ok(self.connect_daemon_page().await?) + } +``` + +Replace with: +```rust + pub async fn connect(&mut self) -> Result, CliError> { + // CDP-direct path: bypass daemon + extension when AUTOCLI_CDP_ENDPOINT is set. + // Used by the autocli-daily microservice (deploy/SPEC.md §5.1). + if let Ok(endpoint) = std::env::var("AUTOCLI_CDP_ENDPOINT") { + if !endpoint.is_empty() { + let page = crate::CdpPage::connect(&endpoint).await?; + return Ok(Arc::new(page)); + } + } + Ok(self.connect_daemon_page().await?) + } +``` + +- [ ] **Step 3: Run the test and confirm it passes** + +```bash +cargo test -p autocli-browser bridge::tests::test_connect_uses_cdp_endpoint_when_env_var_set -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 4: Run the whole crate's tests to confirm no regression** + +```bash +cargo test -p autocli-browser +``` + +Expected: all tests pass. The two existing `test_bridge_construction` / `test_bridge_default_port` still pass. + +- [ ] **Step 5: Commit** + +```bash +git add crates/autocli-browser/src/bridge.rs +git commit -m "feat(browser): wire CdpPage into BrowserBridge::connect + +Add AUTOCLI_CDP_ENDPOINT env-var branch at the top of +BrowserBridge::connect. When set, skip daemon spawn + extension +polling and return Arc directly. The IPage trait contract +is unchanged so pipeline executors and YAML adapters consume either +implementation transparently. + +Required prerequisite for the autocli-daily microservice +(deploy/SPEC.md §1.A) which runs autocli in a container with no +Chrome extension or daemon, connecting to a sibling Chrome container +via CDP." +``` + +--- + +### Task 4: Manual smoke test of the Rust patch against local Stagehand Chrome + +**Files:** none modified + +- [ ] **Step 1: Build release binary** + +```bash +cargo build --release -p autocli +``` + +Expected: builds; binary at `target/release/autocli`. + +- [ ] **Step 2: Confirm local Stagehand Chrome is up + logged into LinkedIn** + +```bash +docker ps --filter "name=stagehand-chrome" --format "{{.Status}} {{.Ports}}" +# Expected: a "Up …" line with 9222 and 6080 ports +curl -s http://localhost:9222/json/version | jq -r '.Browser' +# Expected: non-empty Chrome version string +``` + +If Chrome isn't running locally, start it from `~/Documents/Github/my-stagehand-app/` (the operator's existing setup). + +- [ ] **Step 3: Extract page WS URL** + +```bash +WS_URL=$(curl -s http://localhost:9222/json/list \ + | jq -r '[.[] | select(.type == "page")][0].webSocketDebuggerUrl') +echo "WS_URL=${WS_URL}" +# Expected: ws://localhost:9223/devtools/page/ or ws://127.0.0.1:9223/... +``` + +If the list is empty: +```bash +WS_URL=$(curl -s -X PUT "http://localhost:9222/json/new?about:blank" | jq -r '.webSocketDebuggerUrl') +echo "WS_URL=${WS_URL}" +``` + +- [ ] **Step 4: Run autocli LinkedIn recommended through CDP** + +```bash +AUTOCLI_CDP_ENDPOINT="${WS_URL}" \ + ./target/release/autocli linkedin recommended --limit 5 --with_jd false -f json \ + > /tmp/cdp-smoketest.json +jq 'length' /tmp/cdp-smoketest.json +# Expected: an integer ≥ 1 +jq '.[0] | keys' /tmp/cdp-smoketest.json +# Expected: array including "title", "company", "url" etc. +``` + +- [ ] **Step 5: Record success** + +If the JSON has real job rows, the patch is verified. If empty or error, halt the plan and debug — the rest depends on this working. + +No commit needed (no source changes). + +--- + +## Phase B — `deploy/` scaffold + +### Task 5: deploy/chrome — Dockerfile + +**Files:** +- Create: `deploy/chrome/Dockerfile` + +- [ ] **Step 1: Create the file** + +Copy verbatim from `~/Documents/Github/my-stagehand-app/Dockerfile.chrome`: +```dockerfile +FROM debian:bookworm-slim + +# Install Chromium and dependencies +RUN apt-get update && apt-get install -y \ + chromium \ + curl \ + wget \ + ca-certificates \ + fonts-liberation \ + fonts-noto-cjk \ + fonts-noto-color-emoji \ + libnss3 libgtk-3-0 libdrm2 libgbm1 libasound2 \ + pulseaudio \ + xdg-utils \ + xvfb \ + x11-utils \ + x11-xserver-utils \ + xterm \ + x11vnc \ + novnc \ + websockify \ + autocutsel \ + xclip \ + x11-apps \ + supervisor \ + socat \ + tini \ + --no-install-recommends && \ + rm -rf /var/lib/apt/lists/* + +# Create user data directory +RUN mkdir -p /root/.config/chromium && \ + chmod -R 755 /root/.config/chromium && \ + mkdir -p /tmp/vnc + +COPY deploy/chrome/entrypoint-vnc.sh /usr/local/bin/entrypoint-vnc.sh +RUN chmod +x /usr/local/bin/entrypoint-vnc.sh + +EXPOSE 9222 5900 6080 + +ENTRYPOINT ["tini", "--", "/usr/local/bin/entrypoint-vnc.sh"] +``` + +Note the **single change** from my-stagehand-app: `COPY deploy/chrome/entrypoint-vnc.sh ...` (was `COPY scripts/entrypoint-vnc.sh ...`), because Phase 0 / CI both use repo-root context per SPEC §4.1. + +- [ ] **Step 2: Commit** + +```bash +git add deploy/chrome/Dockerfile +git commit -m "feat(deploy): chrome image Dockerfile + +Copy of my-stagehand-app/Dockerfile.chrome with the COPY path +rewritten for repo-root build context (deploy/SPEC.md §4.1)." +``` + +--- + +### Task 6: deploy/chrome — entrypoint-vnc.sh + +**Files:** +- Create: `deploy/chrome/entrypoint-vnc.sh` + +- [ ] **Step 1: Copy the file** + +```bash +cp ~/Documents/Github/my-stagehand-app/scripts/entrypoint-vnc.sh deploy/chrome/entrypoint-vnc.sh +chmod +x deploy/chrome/entrypoint-vnc.sh +``` + +- [ ] **Step 2: Verify content** + +```bash +head -5 deploy/chrome/entrypoint-vnc.sh +# Expected: starts with "#!/bin/bash" and "# Docker Chrome (VNC 可视化模式) 启动脚本" +grep -c "exec chromium" deploy/chrome/entrypoint-vnc.sh +# Expected: 1 +``` + +- [ ] **Step 3: Commit** + +```bash +git add deploy/chrome/entrypoint-vnc.sh +git commit -m "feat(deploy): chrome image entrypoint-vnc.sh + +Verbatim from my-stagehand-app/scripts/entrypoint-vnc.sh: +Xvfb -> x11vnc -> noVNC -> socat 9222->9223 -> Chromium with +--remote-debugging-port=9223 --user-data-dir=/root/.config/chromium. +Extension loading via /opt/extensions/*/manifest.json is preserved +even though this design ships with no extensions." +``` + +--- + +### Task 7: deploy/daily — Dockerfile + +**Files:** +- Create: `deploy/daily/Dockerfile` + +- [ ] **Step 1: Create the file** + +```dockerfile +# syntax=docker/dockerfile:1.7 +FROM python:3.12-slim-bookworm + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PROJECT_ENVIRONMENT=/opt/venv \ + PATH=/opt/venv/bin:/usr/local/bin:/usr/bin:/bin + +# OS deps: tini for PID-1, jq for cdp-discover, curl for healthcheck, util-linux for flock +RUN apt-get update && apt-get install -y --no-install-recommends \ + tini curl jq ca-certificates util-linux tzdata \ + && rm -rf /var/lib/apt/lists/* + +# supercronic: container-friendly cron +ARG SUPERCRONIC_VERSION=v0.2.30 +ARG SUPERCRONIC_SHA1SUM=9aeb41e00cc7b71d30d33c57a2333f2c2581a201 +RUN curl -fsSLO "https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-amd64" \ + && echo "${SUPERCRONIC_SHA1SUM} supercronic-linux-amd64" | sha1sum -c - \ + && mv supercronic-linux-amd64 /usr/local/bin/supercronic \ + && chmod +x /usr/local/bin/supercronic + +# uv (Astral) — single static binary +RUN curl -LsSf https://astral.sh/uv/install.sh | env INSTALLER_NO_MODIFY_PATH=1 sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv + +WORKDIR /app + +# Python deps first (cache-friendly) +COPY deploy/daily/api/pyproject.toml deploy/daily/api/uv.lock* /app/api/ +RUN cd /app/api && uv sync --frozen --no-dev || uv sync --no-dev + +# Shipped sync script & priority scorer +COPY scripts/sync_autocli_jobs.py scripts/job_priority_scorer.py scripts/job_priority_config.py /app/scripts/ + +# FastAPI app +COPY deploy/daily/api /app/api + +# Shell glue +COPY deploy/daily/cdp-discover.sh deploy/daily/run-daily.sh deploy/daily/entrypoint.sh /app/ +RUN chmod +x /app/cdp-discover.sh /app/run-daily.sh /app/entrypoint.sh + +COPY deploy/daily/crontab /etc/cron.d/autocli + +# Pre-built autocli binary (produced by Phase 0 docker-rust step OR CI build-autocli-binary job) +COPY deploy/daily/bin/autocli /app/bin/autocli +RUN chmod +x /app/bin/autocli + +# Writable runtime dirs +RUN mkdir -p /data/output /data/logs /run && \ + install -m 0644 /dev/null /data/logs/.keep && \ + install -m 0644 /dev/null /data/output/.keep + +ENV TZ=Europe/London \ + CRON_SCHEDULE="0 3 * * *" \ + OUTPUT_RETENTION_DAYS=30 + +EXPOSE 8080 + +ENTRYPOINT ["tini", "--", "/app/entrypoint.sh"] +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/daily/Dockerfile +git commit -m "feat(deploy): daily image Dockerfile + +Multi-arch-aware single-stage image: +- python:3.12-slim-bookworm base +- tini (PID 1), util-linux (flock), jq (CDP discovery), curl (probes) +- supercronic (container cron) pinned to v0.2.30 with sha1 verify +- uv (Astral) for Python deps +- Pre-built autocli binary copied from deploy/daily/bin/ +- FastAPI app + scripts/sync_autocli_jobs.py + scorer modules +- Boot via tini -> entrypoint.sh +- TZ=Europe/London, CRON_SCHEDULE default 03:00." +``` + +--- + +### Task 8: deploy/daily — cdp-discover.sh + +**Files:** +- Create: `deploy/daily/cdp-discover.sh` + +- [ ] **Step 1: Create the file** + +```bash +#!/usr/bin/env bash +# Discover the CDP page-target WebSocket URL on autocli-chrome and +# write it to /run/cdp-endpoint.env as AUTOCLI_CDP_ENDPOINT=... +# Runs once at boot (gating supercronic + uvicorn) AND once at the +# start of every run-daily.sh (page id can change between cron ticks). +# See deploy/SPEC.md §5.2 "Discovery cadence" + "CDP page target". + +set -euo pipefail + +CHROME_HOST="${CHROME_HOST:-autocli-chrome}" +CHROME_PORT="${CHROME_PORT:-9222}" +EXT_HOST_PORT="${CHROME_HOST}:${CHROME_PORT}" +DEADLINE=$(( $(date +%s) + 60 )) # 60 s budget +INTERVAL=2 + +while (( $(date +%s) < DEADLINE )); do + if list_json=$(curl -fsS --max-time 3 "http://${EXT_HOST_PORT}/json/list" 2>/dev/null); then + ws=$(jq -r '[.[] | select(.type=="page")][0].webSocketDebuggerUrl // empty' <<<"${list_json}") + if [[ -z "${ws}" || "${ws}" == "null" ]]; then + # No page target yet — create one. PUT, not POST/GET (Chrome >= M86). + new_json=$(curl -fsS --max-time 3 -X PUT "http://${EXT_HOST_PORT}/json/new?about:blank" 2>/dev/null || true) + ws=$(jq -r '.webSocketDebuggerUrl // empty' <<<"${new_json}") + fi + if [[ -n "${ws}" && "${ws}" != "null" ]]; then + # Chrome reports its internal host (localhost:9223). Rewrite to the docker service name. + rewritten=$(sed -E "s|ws://[^/]+|ws://${EXT_HOST_PORT}|" <<<"${ws}") + echo "AUTOCLI_CDP_ENDPOINT=${rewritten}" > /run/cdp-endpoint.env + chmod 0644 /run/cdp-endpoint.env + echo "[cdp-discover] ${rewritten}" + exit 0 + fi + fi + sleep "${INTERVAL}" +done + +echo "[cdp-discover] FATAL: chrome unreachable after 60s" >&2 +exit 1 +``` + +- [ ] **Step 2: Verify shell parses** + +```bash +bash -n deploy/daily/cdp-discover.sh +# Expected: no output (syntax OK) +``` + +- [ ] **Step 3: Commit** + +```bash +chmod +x deploy/daily/cdp-discover.sh +git add deploy/daily/cdp-discover.sh +git commit -m "feat(deploy): cdp-discover.sh + +Find or create a CDP page target on autocli-chrome:9222. +- GET /json/list, pick first type:page +- if list is empty, PUT /json/new?about:blank (Chrome >= M86) +- rewrite host (localhost:9223 -> autocli-chrome:9222) so the WS URL + is reachable from the daily container's network namespace +- write to /run/cdp-endpoint.env (sourced by run-daily.sh) +- 60s retry budget; exit 1 on timeout (entrypoint exits non-zero, + restart: unless-stopped recreates container until chrome ready)." +``` + +--- + +### Task 9: deploy/daily — run-daily.sh + +**Files:** +- Create: `deploy/daily/run-daily.sh` + +- [ ] **Step 1: Create the file** + +```bash +#!/usr/bin/env bash +# Daily orchestrator. Invoked by: +# * supercronic (cron tick) +# * POST /api/run (FastAPI shells out via trigger.py) +# Implements the §5.2 unified retry policy: 3 attempts at 15s/60s/240s. +# Uses flock so cron + /api/run can't collide. + +set -euo pipefail + +LOCK=/var/lock/autocli-daily.lock +LAST_RUN_JSON=/data/output/last_run.json +LOG_DIR=/data/logs +OUTPUT_DIR=/data/output +DATE_STAMP=$(date +%Y%m%d) +LOG_FILE="${LOG_DIR}/run-${DATE_STAMP}.log" + +mkdir -p "${LOG_DIR}" "${OUTPUT_DIR}" + +# Single-instance gate. -n = non-blocking; -E 200 = exit 200 if already locked. +exec 9>"${LOCK}" +if ! flock -n -E 200 9; then + echo "[run-daily] another run is in progress; exit 200" >&2 + exit 200 +fi + +run_once() { + local attempt="$1" + local started_at + started_at=$(date +%s) + echo "[run-daily] attempt ${attempt} starting at $(date -Iseconds)" | tee -a "${LOG_FILE}" + + # Refresh CDP endpoint every attempt — Chrome may have restarted. + if ! /app/cdp-discover.sh >>"${LOG_FILE}" 2>&1; then + echo "[run-daily] cdp-discover failed" >>"${LOG_FILE}" + return 1 + fi + # shellcheck disable=SC1091 + source /run/cdp-endpoint.env + + local out="${OUTPUT_DIR}/${DATE_STAMP}.json" + if ! /app/bin/autocli linkedin recommended --limit 0 --with_jd true -f json > "${out}" 2>>"${LOG_FILE}"; then + echo "[run-daily] autocli failed" >>"${LOG_FILE}" + return 2 + fi + + # Sync to Supabase + if ! uv --project /app/api run --no-project -- python /app/scripts/sync_autocli_jobs.py --input "${out}" >>"${LOG_FILE}" 2>&1; then + echo "[run-daily] sync_autocli_jobs.py failed" >>"${LOG_FILE}" + return 3 + fi + + local ended_at + ended_at=$(date +%s) + local duration=$(( ended_at - started_at )) + + # Parse counts from the last JSON line printed by sync_autocli_jobs.py + local summary + summary=$(grep -E '^\{' "${LOG_FILE}" | tail -1 || echo "{}") + local upserted scraped skipped + upserted=$(jq -r '.upserted // 0' <<<"${summary}") + scraped=$(jq -r '.input_rows // 0' <<<"${summary}") + skipped=$(jq -r '.skipped // 0' <<<"${summary}") + + jq -n \ + --argjson last_run_unixts "${started_at}" \ + --argjson last_duration_seconds "${duration}" \ + --argjson last_exit_code 0 \ + --argjson rows_scraped "${scraped}" \ + --argjson rows_upserted "${upserted}" \ + --argjson rows_skipped "${skipped}" \ + --arg last_log "$(basename "${LOG_FILE}")" \ + '{last_run_unixts:$last_run_unixts,last_duration_seconds:$last_duration_seconds,last_exit_code:$last_exit_code,rows_scraped:$rows_scraped,rows_upserted:$rows_upserted,rows_skipped:$rows_skipped,last_log:$last_log,errors:[]}' \ + > "${LAST_RUN_JSON}" + echo "[run-daily] attempt ${attempt} succeeded in ${duration}s" | tee -a "${LOG_FILE}" + return 0 +} + +backoffs=(15 60 240) +attempt=1 +final_rc=0 +for sleep_for in "${backoffs[@]}" final; do + if run_once "${attempt}"; then + final_rc=0 + break + fi + final_rc=$? + if [[ "${sleep_for}" == "final" ]]; then + break + fi + echo "[run-daily] sleeping ${sleep_for}s before retry" | tee -a "${LOG_FILE}" + sleep "${sleep_for}" + attempt=$(( attempt + 1 )) +done + +if (( final_rc != 0 )); then + jq -n \ + --argjson last_run_unixts "$(date +%s)" \ + --argjson last_exit_code "${final_rc}" \ + --arg last_log "$(basename "${LOG_FILE}")" \ + '{last_run_unixts:$last_run_unixts,last_exit_code:$last_exit_code,rows_scraped:0,rows_upserted:0,rows_skipped:0,last_log:$last_log,errors:["see log"]}' \ + > "${LAST_RUN_JSON}" +fi +exit "${final_rc}" +``` + +- [ ] **Step 2: Verify shell parses** + +```bash +bash -n deploy/daily/run-daily.sh +# Expected: no output +chmod +x deploy/daily/run-daily.sh +``` + +- [ ] **Step 3: Commit** + +```bash +git add deploy/daily/run-daily.sh +git commit -m "feat(deploy): run-daily.sh orchestrator + +- flock -n to prevent cron + /api/run from colliding +- per-attempt cdp-discover refresh (page id may have rotated) +- runs autocli linkedin recommended -> JSON -> sync_autocli_jobs.py +- unified retry: 3 attempts at 15s/60s/240s (SPEC §5.2) +- writes /data/output/last_run.json consumed by /api/status." +``` + +--- + +### Task 10: deploy/daily — entrypoint.sh + +**Files:** +- Create: `deploy/daily/entrypoint.sh` + +- [ ] **Step 1: Create the file** + +```bash +#!/usr/bin/env bash +# Container PID 2 (tini is PID 1). Sequence: +# 1. boot-time cdp-discover (gates everything else) +# 2. start supercronic + uvicorn as background children, wait on either. + +set -euo pipefail + +echo "[entrypoint] boot cdp-discover" +/app/cdp-discover.sh + +echo "[entrypoint] starting supercronic + uvicorn" +supercronic -quiet /etc/cron.d/autocli & +CRON_PID=$! + +cd /app/api +uv run --no-project -- uvicorn main:app --host 0.0.0.0 --port 8080 & +API_PID=$! + +# Forward SIGTERM to children for graceful shutdown via tini. +trap 'kill -TERM "${CRON_PID}" "${API_PID}" 2>/dev/null || true' TERM INT + +# Exit when either child exits (compose/Watchtower can then restart cleanly). +wait -n "${CRON_PID}" "${API_PID}" +exit $? +``` + +- [ ] **Step 2: Verify + commit** + +```bash +bash -n deploy/daily/entrypoint.sh +chmod +x deploy/daily/entrypoint.sh +git add deploy/daily/entrypoint.sh +git commit -m "feat(deploy): daily entrypoint.sh + +Boot-time cdp-discover gate, then runs supercronic + uvicorn in +parallel under tini. wait -n exits as soon as either child dies, so +compose's restart policy can pick up failure modes (e.g. uvicorn +panic, supercronic crash)." +``` + +--- + +### Task 11: deploy/daily — crontab + +**Files:** +- Create: `deploy/daily/crontab` + +- [ ] **Step 1: Create the file** + +``` +# supercronic crontab — runs in container TZ=Europe/London. +# Daily LinkedIn pull +0 3 * * * /app/run-daily.sh + +# Output retention: delete files older than ${OUTPUT_RETENTION_DAYS:-30} days +0 4 * * * find /data/output -name "*.json" -type f -mtime +30 -delete +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/daily/crontab +git commit -m "feat(deploy): supercronic crontab + +03:00 daily LinkedIn pull + 04:00 30-day output retention sweep +(SPEC §5.2). TZ resolved by the container's TZ=Europe/London." +``` + +--- + +### Task 12: deploy/daily/api — pyproject.toml + +**Files:** +- Create: `deploy/daily/api/pyproject.toml` + +- [ ] **Step 1: Create the file** + +```toml +[project] +name = "autocli-daily-api" +version = "0.1.0" +description = "FastAPI control plane for the autocli-daily microservice" +requires-python = ">=3.12" +dependencies = [ + "fastapi>=0.115,<0.116", + "uvicorn[standard]>=0.32,<0.33", + "httpx>=0.28,<0.29", + "supabase>=2.8,<3.0", + "prometheus-client>=0.21,<0.22", + "python-multipart>=0.0.12", +] + +[dependency-groups] +dev = [ + "pytest>=8.3,<9", + "pytest-asyncio>=0.24,<1", + "respx>=0.21,<1", +] + +[tool.uv] +package = false +``` + +- [ ] **Step 2: Resolve lockfile** + +```bash +cd deploy/daily/api && uv lock && cd - +ls deploy/daily/api/uv.lock +# Expected: file exists +``` + +- [ ] **Step 3: Commit** + +```bash +git add deploy/daily/api/pyproject.toml deploy/daily/api/uv.lock +git commit -m "feat(deploy): FastAPI project metadata + lockfile + +uv-managed; pins fastapi/uvicorn/supabase/prometheus-client/httpx +to compatible ranges. Lockfile checked in so the Dockerfile's +'uv sync --frozen' is reproducible." +``` + +--- + +### Task 13: deploy/daily/api — trigger.py + +**Files:** +- Create: `deploy/daily/api/trigger.py` + +- [ ] **Step 1: Create the file** + +```python +"""Subprocess wrapper for /app/run-daily.sh. + +Used by both supercronic (via crontab) and FastAPI /api/run. +Provides a synchronous "is it running?" check via flock probe +and a fire-and-forget spawn for the API path. +""" +from __future__ import annotations + +import asyncio +import fcntl +import os +import subprocess +from pathlib import Path + +LOCK_PATH = Path("/var/lock/autocli-daily.lock") +RUN_DAILY = "/app/run-daily.sh" + + +def is_running() -> bool: + """Non-destructive flock probe: returns True if another process holds the lock.""" + if not LOCK_PATH.exists(): + return False + fd = os.open(LOCK_PATH, os.O_RDWR | os.O_CREAT, 0o644) + try: + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(fd, fcntl.LOCK_UN) + return False + except BlockingIOError: + return True + finally: + os.close(fd) + + +async def spawn_run_daily() -> int: + """Spawn run-daily.sh in the background. Returns PID. Does NOT wait.""" + proc = await asyncio.create_subprocess_exec( + RUN_DAILY, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + return proc.pid +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/daily/api/trigger.py +git commit -m "feat(deploy): trigger.py — shared run-daily executor + +Used by POST /api/run to spawn run-daily.sh non-blockingly. +is_running() is a non-destructive flock probe so /api/status can +report in_progress without affecting the actual run." +``` + +--- + +### Task 14: deploy/daily/api — main.py (FastAPI) + +**Files:** +- Create: `deploy/daily/api/main.py` + +- [ ] **Step 1: Create the file** + +```python +"""FastAPI control plane for autocli-daily. + +Routes (mounted per SPEC §5.1): + GET /api/status [Bearer] last_run.json + POST /api/run [Bearer] spawn run-daily.sh (flock-protected) + GET /api/logs [Bearer] tail -n 200 latest log + GET /api/metrics [open] Prometheus exposition + GET /api/health [open] chrome reachability + cdp endpoint sanity + GET /jobs [Bearer] Supabase read proxy +""" +from __future__ import annotations + +import json +import os +import time +from pathlib import Path +from typing import Annotated + +import httpx +from fastapi import Depends, FastAPI, HTTPException, Query, Response, status +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, generate_latest + +from . import trigger + +# ── config ─────────────────────────────────────────────────────────── +API_RUN_TOKEN = os.environ["API_RUN_TOKEN"] +SUPABASE_URL = os.environ["SUPABASE_URL"] +SUPABASE_ANON_KEY = os.environ["SUPABASE_ANON_KEY"] +CHROME_HOST = os.environ.get("CHROME_HOST", "autocli-chrome") +CHROME_PORT = int(os.environ.get("CHROME_PORT", "9222")) +LAST_RUN_PATH = Path("/data/output/last_run.json") +LOGS_DIR = Path("/data/logs") +CDP_ENDPOINT_FILE = Path("/run/cdp-endpoint.env") + +# ── metrics ────────────────────────────────────────────────────────── +M_RUNS_TOTAL = Counter( + "autocli_daily_runs_total", + "Run outcomes", + labelnames=("result",), +) +M_LAST_RUN_UNIXTS = Gauge("autocli_daily_last_run_unixts", "Unix ts of last run start") +M_LAST_DURATION = Gauge("autocli_daily_last_duration_seconds", "Duration of last run") +M_LAST_EXIT_CODE = Gauge("autocli_daily_last_exit_code", "Exit code of last run") +M_RUN_IN_PROGRESS = Gauge("autocli_daily_run_in_progress", "1 if a run is in flight") +M_ROWS_SCRAPED = Counter("autocli_daily_rows_scraped_total", "Cumulative scraped rows") +M_ROWS_UPSERTED = Counter("autocli_daily_rows_upserted_total", "Cumulative upserted rows") +M_ROWS_SKIPPED = Counter("autocli_daily_rows_skipped_total", "Cumulative skipped rows") +M_CDP_UP = Gauge("autocli_chrome_cdp_up", "1 if chrome:9222 reachable") + +# Counter de-dupe key (do not double-count between scrapes) +_last_seen_counters = {"upserted": 0, "scraped": 0, "skipped": 0} + + +# ── auth ───────────────────────────────────────────────────────────── +bearer = HTTPBearer(auto_error=False) + + +def require_bearer(creds: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer)]): + if creds is None or creds.scheme.lower() != "bearer" or creds.credentials != API_RUN_TOKEN: + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="missing or invalid bearer") + return True + + +# ── app ────────────────────────────────────────────────────────────── +app = FastAPI(title="autocli-daily") + + +def _read_last_run() -> dict: + if not LAST_RUN_PATH.exists(): + return {"last_run_unixts": 0, "last_exit_code": None, "rows_scraped": 0, "rows_upserted": 0, "rows_skipped": 0, "errors": []} + return json.loads(LAST_RUN_PATH.read_text()) + + +def _refresh_metrics(): + """Reflect last_run.json + chrome reachability into Prometheus gauges.""" + lr = _read_last_run() + if lr.get("last_run_unixts"): + M_LAST_RUN_UNIXTS.set(lr["last_run_unixts"]) + if lr.get("last_duration_seconds") is not None: + M_LAST_DURATION.set(lr["last_duration_seconds"]) + if lr.get("last_exit_code") is not None: + M_LAST_EXIT_CODE.set(lr["last_exit_code"]) + M_RUN_IN_PROGRESS.set(1 if trigger.is_running() else 0) + + # Counter delta — only emit increase, never decrease + for field, counter in (("rows_upserted", M_ROWS_UPSERTED), + ("rows_scraped", M_ROWS_SCRAPED), + ("rows_skipped", M_ROWS_SKIPPED)): + cur = lr.get(field, 0) + delta = cur - _last_seen_counters[field.split("_", 1)[1]] + if delta > 0: + counter.inc(delta) + _last_seen_counters[field.split("_", 1)[1]] = cur + + +@app.get("/api/health") +def health(): + try: + r = httpx.get(f"http://{CHROME_HOST}:{CHROME_PORT}/json/version", timeout=2.0) + chrome_ok = r.status_code == 200 + except Exception: + chrome_ok = False + M_CDP_UP.set(1 if chrome_ok else 0) + cdp_file_ok = CDP_ENDPOINT_FILE.exists() + body = {"chrome": chrome_ok, "cdp_endpoint_file": cdp_file_ok} + code = 200 if chrome_ok and cdp_file_ok else 503 + return Response(content=json.dumps(body), status_code=code, media_type="application/json") + + +@app.get("/api/metrics") +def metrics(): + _refresh_metrics() + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) + + +@app.get("/api/status") +def api_status(_: Annotated[bool, Depends(require_bearer)]): + body = _read_last_run() + body["run_in_progress"] = trigger.is_running() + return body + + +@app.post("/api/run", status_code=202) +async def api_run(_: Annotated[bool, Depends(require_bearer)]): + if trigger.is_running(): + raise HTTPException(status_code=409, detail="run already in progress") + pid = await trigger.spawn_run_daily() + return {"started_at": int(time.time()), "pid": pid} + + +@app.get("/api/logs") +def api_logs(_: Annotated[bool, Depends(require_bearer)], lines: int = Query(200, ge=1, le=10000)): + files = sorted(LOGS_DIR.glob("run-*.log")) + if not files: + return Response(content="", media_type="text/plain") + latest = files[-1] + with latest.open("rb") as fh: + fh.seek(0, 2) + size = fh.tell() + # Read up to last ~256KB and then split lines + read = min(size, 256 * 1024) + fh.seek(size - read) + data = fh.read().decode("utf-8", errors="replace") + tail = "\n".join(data.splitlines()[-lines:]) + return Response(content=tail, media_type="text/plain") + + +@app.get("/jobs") +def jobs(_: Annotated[bool, Depends(require_bearer)], + since: str = Query(..., description="ISO date, e.g. 2026-05-15")): + # Lazy import — supabase client takes ~100ms to construct + from supabase import create_client + client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY) + res = ( + client.schema("jobs") + .table("jobs") + .select("id, job_title, company_name, location, salary, post_time, apply_url, priority_score") + .gte("post_time", since) + .order("post_time", desc=True) + .limit(500) + .execute() + ) + return {"count": len(res.data or []), "since": since, "rows": res.data or []} +``` + +- [ ] **Step 2: Verify Python imports** + +```bash +cd deploy/daily/api && uv run python -c "import main" +``` + +Expected: no import errors. (Will fail without env vars set — that's fine; we just want syntax/import-time errors to surface.) Actually the module-level `os.environ["API_RUN_TOKEN"]` will KeyError if unset: + +```bash +cd deploy/daily/api && API_RUN_TOKEN=t SUPABASE_URL=http://x SUPABASE_ANON_KEY=x \ + uv run python -c "import main; print('ok')" +``` + +Expected: `ok`. + +- [ ] **Step 3: Commit** + +```bash +git add deploy/daily/api/main.py +git commit -m "feat(deploy): FastAPI app — /api/* + /jobs + +Routes per SPEC §5.1: + GET /api/health [open] chrome reachability + cdp file probe + GET /api/metrics [open] Prometheus exposition (delta-aware counters) + GET /api/status [Bearer] last_run.json + in_progress + POST /api/run [Bearer] spawn run-daily.sh, 409 if already running + GET /api/logs [Bearer] tail of latest log (default 200 lines) + GET /jobs [Bearer] Supabase 'jobs.jobs' read proxy via + client.schema('jobs').table('jobs')." +``` + +--- + +### Task 15: deploy/daily/api — tests + +**Files:** +- Create: `deploy/daily/api/tests/__init__.py` (empty) +- Create: `deploy/daily/api/tests/test_main.py` + +- [ ] **Step 1: Empty __init__.py** + +```bash +mkdir -p deploy/daily/api/tests +: > deploy/daily/api/tests/__init__.py +``` + +- [ ] **Step 2: Write tests** + +```python +# deploy/daily/api/tests/test_main.py +"""Auth + route shape tests. Run via: + cd deploy/daily/api && uv run pytest -v +""" +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture +def client(tmp_path, monkeypatch): + # Required env BEFORE main is imported + monkeypatch.setenv("API_RUN_TOKEN", "test-token-abc") + monkeypatch.setenv("SUPABASE_URL", "https://example.supabase.co") + monkeypatch.setenv("SUPABASE_ANON_KEY", "test-anon") + monkeypatch.setenv("CHROME_HOST", "example-chrome") + # Redirect runtime paths + data_dir = tmp_path / "data" / "output" + data_dir.mkdir(parents=True) + logs_dir = tmp_path / "data" / "logs" + logs_dir.mkdir(parents=True) + monkeypatch.setattr("pathlib.Path.exists", Path.exists) # noop placeholder + # Force module reload to pick up env + import importlib + import sys + sys.modules.pop("main", None) + import main as m + importlib.reload(m) + m.LAST_RUN_PATH = data_dir / "last_run.json" + m.LOGS_DIR = logs_dir + m.CDP_ENDPOINT_FILE = tmp_path / "run" / "cdp-endpoint.env" + return TestClient(m.app) + + +def test_status_requires_bearer(client): + r = client.get("/api/status") + assert r.status_code == 401 + + +def test_status_wrong_bearer(client): + r = client.get("/api/status", headers={"Authorization": "Bearer wrong"}) + assert r.status_code == 401 + + +def test_status_returns_default_when_no_last_run(client): + r = client.get("/api/status", headers={"Authorization": "Bearer test-token-abc"}) + assert r.status_code == 200 + body = r.json() + assert body["last_run_unixts"] == 0 + assert body["rows_scraped"] == 0 + assert body["run_in_progress"] is False + + +def test_status_reflects_last_run_file(client, tmp_path): + import main as m + m.LAST_RUN_PATH.write_text(json.dumps({ + "last_run_unixts": 1747958400, + "last_duration_seconds": 142.3, + "last_exit_code": 0, + "rows_scraped": 100, + "rows_upserted": 75, + "rows_skipped": 25, + "errors": [], + })) + r = client.get("/api/status", headers={"Authorization": "Bearer test-token-abc"}) + assert r.status_code == 200 + body = r.json() + assert body["last_run_unixts"] == 1747958400 + assert body["rows_upserted"] == 75 + + +def test_run_requires_bearer(client): + r = client.post("/api/run") + assert r.status_code == 401 + + +def test_logs_requires_bearer(client): + r = client.get("/api/logs") + assert r.status_code == 401 + + +def test_jobs_requires_bearer(client): + r = client.get("/jobs?since=2026-05-15") + assert r.status_code == 401 + + +def test_metrics_is_open(client): + r = client.get("/api/metrics") + assert r.status_code == 200 + assert "autocli_daily" in r.text + + +def test_health_unreachable_chrome_returns_503(client, monkeypatch): + import httpx + def bad_get(*args, **kwargs): + raise httpx.ConnectError("boom") + monkeypatch.setattr("httpx.get", bad_get) + r = client.get("/api/health") + assert r.status_code == 503 + body = r.json() + assert body["chrome"] is False +``` + +- [ ] **Step 3: Run tests** + +```bash +cd deploy/daily/api && uv run --group dev pytest -v +``` + +Expected: 9 passed. + +- [ ] **Step 4: Commit** + +```bash +git add deploy/daily/api/tests/__init__.py deploy/daily/api/tests/test_main.py +git commit -m "test(deploy): FastAPI auth + route shape tests + +9 tests covering: +- /api/status, /api/run, /api/logs, /jobs all return 401 without Bearer + and 401 with wrong Bearer +- /api/status default-shape + reflects last_run.json +- /api/metrics is open and contains the autocli_daily_ family +- /api/health returns 503 when chrome:9222 unreachable." +``` + +--- + +### Task 16: deploy/prometheus — scrape config + +**Files:** +- Create: `deploy/prometheus/prometheus.yml` + +- [ ] **Step 1: Create the file** + +```yaml +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: autocli-daily + metrics_path: /api/metrics + static_configs: + - targets: + - autocli-daily:8080 +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/prometheus/prometheus.yml +git commit -m "feat(deploy): prometheus scrape config + +Single job scraping autocli-daily:8080/api/metrics every 15s. +metrics_path is required because FastAPI mounts under /api/*." +``` + +--- + +### Task 17: deploy/grafana — provisioning + +**Files:** +- Create: `deploy/grafana/provisioning/datasources/prometheus.yml` +- Create: `deploy/grafana/provisioning/dashboards/dashboards.yml` +- Create: `deploy/grafana/provisioning/dashboards/autocli.json` + +- [ ] **Step 1: Datasource provisioning** + +`deploy/grafana/provisioning/datasources/prometheus.yml`: +```yaml +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + uid: prom-autocli + url: http://prometheus:9090 + access: proxy + isDefault: true + editable: false +``` + +- [ ] **Step 2: Dashboard provider config** + +`deploy/grafana/provisioning/dashboards/dashboards.yml`: +```yaml +apiVersion: 1 +providers: + - name: autocli + orgId: 1 + folder: AutoCLI + type: file + disableDeletion: true + updateIntervalSeconds: 30 + allowUiUpdates: false + options: + path: /etc/grafana/provisioning/dashboards +``` + +- [ ] **Step 3: Dashboard JSON** + +`deploy/grafana/provisioning/dashboards/autocli.json` — six panels per SPEC §5.5: + +```json +{ + "schemaVersion": 39, + "title": "AutoCLI Daily", + "uid": "autocli-daily", + "tags": ["autocli"], + "timezone": "Europe/London", + "refresh": "30s", + "time": {"from": "now-30d", "to": "now"}, + "panels": [ + { + "id": 1, "type": "stat", + "title": "Time since last run", + "gridPos": {"x": 0, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "time() - autocli_daily_last_run_unixts"}], + "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "red", "value": 90000}]}}} + }, + { + "id": 2, "type": "stat", + "title": "Last exit code", + "gridPos": {"x": 6, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "autocli_daily_last_exit_code"}], + "fieldConfig": {"defaults": {"thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "red", "value": 1}]}}} + }, + { + "id": 3, "type": "stat", + "title": "Rows upserted today", + "gridPos": {"x": 12, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_upserted_total[24h])"}] + }, + { + "id": 4, "type": "stat", + "title": "Chrome CDP up (24h avg)", + "gridPos": {"x": 18, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "avg_over_time(autocli_chrome_cdp_up[24h])"}], + "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "yellow", "value": 0.9}, {"color": "green", "value": 0.99}]}}} + }, + { + "id": 5, "type": "timeseries", + "title": "Daily rows (scraped / upserted / skipped)", + "gridPos": {"x": 0, "y": 4, "w": 24, "h": 8}, + "targets": [ + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_scraped_total[1d])", "legendFormat": "scraped"}, + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_upserted_total[1d])", "legendFormat": "upserted"}, + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_skipped_total[1d])", "legendFormat": "skipped"} + ] + }, + { + "id": 6, "type": "timeseries", + "title": "Run duration", + "gridPos": {"x": 0, "y": 12, "w": 24, "h": 8}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "autocli_daily_last_duration_seconds", "legendFormat": "duration (s)"}], + "fieldConfig": {"defaults": {"unit": "s"}} + } + ] +} +``` + +- [ ] **Step 4: Commit** + +```bash +git add deploy/grafana/ +git commit -m "feat(deploy): grafana provisioning + 6-panel dashboard + +- Datasource: Prometheus at prometheus:9090 (uid prom-autocli) +- Dashboard provider points at /etc/grafana/provisioning/dashboards +- autocli.json: time-since-last-run, last exit code, rows-upserted-today, + CDP-up %, daily scraped/upserted/skipped time series, duration +- No plugin dependencies (Infinity dropped per L313 review)." +``` + +--- + +### Task 18: deploy/docker-compose.yml + +**Files:** +- Create: `deploy/docker-compose.yml` + +- [ ] **Step 1: Create the file** + +```yaml +name: autocli-stack + +x-watchtower-label: &watchtower-enable + com.centurylinklabs.watchtower.enable: "true" + +services: + autocli-chrome: + image: ghcr.io/ricksanchez88e/autocli-chrome:main + container_name: autocli-chrome + restart: unless-stopped + shm_size: "2gb" + environment: + VNC_PASSWORD: ${VNC_PASSWORD} + TZ: ${TZ:-Europe/London} + ports: + - "6080:6080" # noVNC web (also proxied via Cloudflare vnc subdomain) + - "5900:5900" # native VNC (local-only convenience; not in Cloudflare ingress) + - "9222:9222" # CDP (also proxied via Cloudflare cdp subdomain — strict Access) + volumes: + - chrome-profile:/root/.config/chromium + - chrome-tmp:/tmp + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:9222/json/version"] + interval: 10s + timeout: 3s + retries: 10 + start_period: 20s + networks: [autocli-net] + labels: *watchtower-enable + + autocli-daily: + image: ghcr.io/ricksanchez88e/autocli-daily:main + container_name: autocli-daily + restart: unless-stopped + depends_on: + autocli-chrome: + condition: service_healthy + environment: + TZ: ${TZ:-Europe/London} + CRON_SCHEDULE: ${CRON_SCHEDULE:-0 3 * * *} + CHROME_HOST: autocli-chrome + CHROME_PORT: "9222" + API_RUN_TOKEN: ${API_RUN_TOKEN} + SUPABASE_URL: ${SUPABASE_URL} + SUPABASE_SERVICE_ROLE_KEY: ${SUPABASE_SERVICE_ROLE_KEY} + SUPABASE_ANON_KEY: ${SUPABASE_ANON_KEY} + volumes: + - daily-data:/data + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:8080/api/health"] + interval: 15s + timeout: 5s + retries: 6 + start_period: 60s + networks: [autocli-net] + labels: *watchtower-enable + + cloudflared: + image: cloudflare/cloudflared:2025.4.0 + container_name: autocli-cloudflared + restart: unless-stopped + command: tunnel --no-autoupdate run --token ${CLOUDFLARE_TUNNEL_TOKEN} + environment: + TUNNEL_TOKEN: ${CLOUDFLARE_TUNNEL_TOKEN} + depends_on: + autocli-daily: + condition: service_healthy + networks: [autocli-net] + + prometheus: + image: prom/prometheus:v3.5.0 + container_name: autocli-prometheus + restart: unless-stopped + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=90d + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prom-data:/prometheus + networks: [autocli-net] + + grafana: + image: grafana/grafana:11.6.0 + container_name: autocli-grafana + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} + GF_USERS_ALLOW_SIGN_UP: "false" + GF_AUTH_ANONYMOUS_ENABLED: "false" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - grafana-data:/var/lib/grafana + depends_on: + - prometheus + networks: [autocli-net] + +networks: + autocli-net: + driver: bridge + +volumes: + chrome-profile: + chrome-tmp: + daily-data: + prom-data: + grafana-data: +``` + +- [ ] **Step 2: Validate compose syntax** + +```bash +docker compose -f deploy/docker-compose.yml config > /dev/null +``` + +Expected: command exits 0; no warnings about missing env vars (those resolve via .env at runtime). + +- [ ] **Step 3: Commit** + +```bash +git add deploy/docker-compose.yml +git commit -m "feat(deploy): production docker-compose.yml + +5 services on shared autocli-net bridge: +- autocli-chrome (Stagehand, watchtower-tracked, healthcheck on 9222) +- autocli-daily (cron+FastAPI, watchtower-tracked, depends_on chrome + healthy, env scoped to Supabase creds only) +- cloudflared (Tunnel token mode, depends_on daily healthy) +- prometheus (pinned, 90-day retention) +- grafana (pinned, anon disabled, signup disabled, admin from env) +Named volumes for profile / output / tsdb / grafana state." +``` + +--- + +### Task 19: deploy/docker-compose.local.yml + +**Files:** +- Create: `deploy/docker-compose.local.yml` + +- [ ] **Step 1: Create the file** + +```yaml +# Local override for Phase 1 testing. +# Run: +# docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.local.yml --env-file deploy/.env.local up -d + +name: autocli-stack-local + +services: + autocli-chrome: + container_name: autocli-chrome-local + image: test-chrome:latest # built locally in Phase 0 + ports: + - "6081:6080" + - "5902:5900" + - "9223:9222" + + autocli-daily: + container_name: autocli-daily-local + image: test-daily:latest # built locally in Phase 0 + ports: + - "8081:8080" + + # No Cloudflare in local mode + cloudflared: + profiles: ["disabled"] + + prometheus: + ports: + - "9091:9090" + + grafana: + ports: + - "3001:3000" +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/docker-compose.local.yml +git commit -m "feat(deploy): local-only override + +Binds host ports under non-conflicting numbers (6081/5902/9223/8081/ +9091/3001) so the operator can keep their existing local Chrome and +Grafana running alongside. cloudflared moved to a 'disabled' profile." +``` + +--- + +### Task 20: deploy/.env.example + +**Files:** +- Create: `deploy/.env.example` + +- [ ] **Step 1: Create the file** + +``` +# Cloudflare Tunnel (token mode — credentials NOT used) +CLOUDFLARE_TUNNEL_TOKEN= + +# Supabase +SUPABASE_URL= +SUPABASE_SERVICE_ROLE_KEY= +SUPABASE_ANON_KEY= + +# API auth (defense-in-depth on top of Cloudflare Access). +# Generate: openssl rand -hex 32 +API_RUN_TOKEN= + +# VNC password (generate: openssl rand -base64 18). NEVER use the dev value 'stagehand' in prod. +VNC_PASSWORD= + +# Grafana admin (generate: openssl rand -hex 16) +GF_SECURITY_ADMIN_PASSWORD= + +# Scheduling +TZ=Europe/London +CRON_SCHEDULE=0 3 * * * +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/.env.example +git commit -m "feat(deploy): .env.example template + +All required environment variables with empty values + inline +generator hints. Real .env never committed (.gitignore already +covers it under '.env')." +``` + +--- + +### Task 21: deploy/README.md + +**Files:** +- Create: `deploy/README.md` + +- [ ] **Step 1: Create the file** + +```markdown +# AutoCLI Daily Microservice — Deploy + +See [`SPEC.md`](./SPEC.md) for design, [`PLAN.md`](./PLAN.md) for the implementation walkthrough. +This file is the operator-facing runbook. + +## Quickstart on a fresh host + +```bash +ssh rick@100.108.80.9 +mkdir -p ~/autocli-stack && cd ~/autocli-stack + +# 1. Copy compose files + .env (scp from your laptop) +# (See SPEC §6.3 for the secret-transfer mechanism.) +cp deploy/docker-compose.yml . +cp deploy/.env.example .env +$EDITOR .env # fill every blank + +# 2. Bring up the stack +docker compose pull +docker compose up -d +docker compose ps # all 5 should be healthy + +# 3. One-time LinkedIn login via VNC +# Browse to https://autocli-vnc./vnc.html, password from .env +# Log into linkedin.com once, profile cookies persist in the +# `chrome-profile` named volume. + +# 4. Probe the surface (see SPEC §7 Phase 4a) +``` + +## Cloudflare dashboard checklist + +For each subdomain (`vnc`, `cdp`, `api`, `grafana`): +1. Tunnel → Public Hostnames → Add → set service URL to + `http://autocli-chrome:6080` / `http://autocli-chrome:9222` / + `http://autocli-daily:8080` / `http://grafana:3000`. +2. Access → Applications → Add Application → Self-Hosted → + `.autocli.` → policies per SPEC §5.3 table. +3. **Defer adding `autocli-cdp` until Phase 4a is green for the + other three subdomains** (SPEC §9 risk 1). + +## Forced run + +```bash +curl -X POST \ + -H "CF-Access-Client-Id: $CF_ID" \ + -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/run +``` + +## Troubleshooting + +| Symptom | Where to look | +|---|---| +| `/api/health` 503 | `docker logs autocli-chrome` — usually profile lock or socat | +| LinkedIn login expired | VNC in, re-login. Cookies persist in `chrome-profile` volume | +| Tunnel 502 | `docker logs autocli-cloudflared`; check token | +| Watchtower didn't pull new image | Check it's running (`docker ps \| grep watchtower`); 5-min poll | +``` + +- [ ] **Step 2: Commit** + +```bash +git add deploy/README.md +git commit -m "docs(deploy): operator-facing README + runbook + +Quickstart, Cloudflare dashboard checklist, forced-run snippet, +common-failure table. Points back at SPEC + PLAN for the why." +``` + +--- + +## Phase C — CI workflow + +### Task 22: GitHub Actions workflow + +**Files:** +- Create: `.github/workflows/deploy-microservice.yml` + +- [ ] **Step 1: Create the file** + +```yaml +name: deploy-microservice + +on: + push: + branches: [feat/daily-microservice, main] + paths: + - deploy/** + - crates/** + - scripts/sync_autocli_jobs.py + - scripts/job_priority_scorer.py + - scripts/job_priority_config.py + - rust-toolchain.toml + - .github/workflows/deploy-microservice.yml + workflow_dispatch: + +env: + IS_MAIN: ${{ github.ref == 'refs/heads/main' }} + +jobs: + build-autocli-binary: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: Swatinem/rust-cache@v2 + - run: cargo build --release -p autocli + - uses: actions/upload-artifact@v4 + with: + name: autocli-bin + path: target/release/autocli + retention-days: 7 + + build-chrome-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # NOTE: slugifier — `type=ref,event=branch` runs metadata-action's + # slugifier, so `feat/daily-microservice` becomes + # `branch-feat-daily-microservice` (Docker-tag-safe). + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-chrome + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . + file: deploy/chrome/Dockerfile + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: true + + build-daily-image: + runs-on: ubuntu-latest + needs: [build-autocli-binary] + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: autocli-bin + path: deploy/daily/bin + - run: chmod +x deploy/daily/bin/autocli + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-daily + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . + file: deploy/daily/Dockerfile + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: true +``` + +- [ ] **Step 2: Commit** + +```bash +git add .github/workflows/deploy-microservice.yml +git commit -m "feat(ci): GitHub Actions workflow for the daily microservice + +3 jobs: +1. build-autocli-binary: cargo build --release -p autocli on + ubuntu-latest (linux/amd64) with Swatinem cache; uploads artifact +2. build-chrome-image: builds deploy/chrome from repo-root context; + docker/metadata-action generates :main on main, :branch- on + feature branches, :sha- always +3. build-daily-image: downloads the autocli artifact, builds + deploy/daily from repo-root context, same tag policy + +Path filters include rust-toolchain.toml so a toolchain bump triggers +a rebuild." +``` + +--- + +## Phase D — Local Phase 0 + Phase 1 verification + +### Task 23: Phase 0 — build images locally + +**Files:** none modified + +- [ ] **Step 1: Build the autocli binary inside Docker rust 1.94** + +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI-daily +mkdir -p deploy/daily/bin +docker run --rm --platform linux/amd64 \ + -v "$PWD":/work -w /work \ + -v autocli-daily-cargo-cache:/usr/local/cargo/registry \ + -v autocli-daily-cargo-target:/work/target \ + rust:1.94-slim-bookworm \ + bash -c "apt-get update -qq && apt-get install -y -qq pkg-config libssl-dev && cargo build --release -p autocli && cp target/release/autocli deploy/daily/bin/autocli" +chmod +x deploy/daily/bin/autocli +``` + +- [ ] **Step 2: Verify binary architecture** + +```bash +file deploy/daily/bin/autocli +# Expected: ELF 64-bit LSB executable, x86-64 +``` + +If output mentions Mach-O, halt — the build ran on the host, not in the linux/amd64 container. + +- [ ] **Step 3: Build both Docker images** + +```bash +docker buildx build --platform linux/amd64 -f deploy/chrome/Dockerfile -t test-chrome . +docker buildx build --platform linux/amd64 -f deploy/daily/Dockerfile -t test-daily . +``` + +Both should succeed. + +- [ ] **Step 4: Smoke-test the binary inside the daily image** + +```bash +docker run --rm --platform linux/amd64 test-daily /app/bin/autocli --version +# Expected: a non-empty version string +``` + +- [ ] **Step 5: Commit (optional — keeps deploy/daily/bin/.gitkeep)** + +```bash +# deploy/daily/bin/autocli is large (~50MB), don't commit it. Add ignore: +echo "deploy/daily/bin/autocli" >> .gitignore +git add .gitignore +git commit -m "chore: gitignore the local Phase 0 autocli binary + +CI downloads the artifact at build time; locally Phase 0 produces +this via the docker-rust step." +``` + +--- + +### Task 24: Phase 1 — local e2e + LinkedIn login + +**Files:** +- Create: `deploy/.env.local` + +- [ ] **Step 1: Generate local secrets** + +```bash +cat > deploy/.env.local </dev/null || true +``` + +(You'll restart it later if you still want the original.) + +- [ ] **Step 3: Bring up the local stack** + +```bash +docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.local.yml --env-file deploy/.env.local up -d +docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.local.yml ps +# Expected: 4 services Up (cloudflared is disabled in local override) +``` + +- [ ] **Step 4: One-time LinkedIn login via VNC** + +Open `http://localhost:6081/vnc.html?password=stagehand` in a browser. In the VNC viewer: +1. Open `linkedin.com` +2. Sign in with your account +3. Close the tab when done + +Cookies persist in the `chrome-profile` named volume. + +- [ ] **Step 5: Force a daily run** + +```bash +LOCAL_TOKEN=$(grep ^API_RUN_TOKEN= deploy/.env.local | cut -d= -f2-) + +# health first +curl -s http://localhost:8081/api/health | jq + +# trigger +curl -X POST -H "Authorization: Bearer $LOCAL_TOKEN" http://localhost:8081/api/run + +# wait + poll +sleep 240 +curl -s -H "Authorization: Bearer $LOCAL_TOKEN" http://localhost:8081/api/status | jq +``` + +Expected: `last_exit_code: 0`, `rows_upserted > 0`. Supabase `jobs.jobs` should also show today's rows. + +- [ ] **Step 6: Inspect Grafana** + +Open `http://localhost:3001` (admin / value of `GF_SECURITY_ADMIN_PASSWORD` in `.env.local`). Dashboard "AutoCLI Daily" should already be provisioned and show today's run. + +- [ ] **Step 7: Tear down** + +```bash +docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.local.yml down +``` + +(Volumes are retained; subsequent runs resume from logged-in profile.) + +No commit (config only). + +--- + +## Phase E — Push branch + CI green (Phase 2) + +### Task 25: Push and watch CI + +**Files:** none modified + +- [ ] **Step 1: Push branch** + +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI-daily +git push -u origin feat/daily-microservice +``` + +- [ ] **Step 2: Watch the workflow** + +```bash +gh run watch --repo RickSanchez88E/AutoCLI +# Or: gh run list --branch feat/daily-microservice --limit 1 +``` + +Expected: 3 jobs (`build-autocli-binary`, `build-chrome-image`, `build-daily-image`) all green. + +- [ ] **Step 3: Verify GHCR tags** + +```bash +gh api /users/RickSanchez88E/packages/container/autocli-chrome/versions --jq '.[].metadata.container.tags' | head +gh api /users/RickSanchez88E/packages/container/autocli-daily/versions --jq '.[].metadata.container.tags' | head +``` + +Expected to see: +- `branch-feat-daily-microservice` +- `sha-` +- **NO `main` tag** (will appear only after merge to main) + +If `main` accidentally appears on a feature branch push, halt — the workflow's `enable=${{ env.IS_MAIN }}` is misconfigured. + +No commit. + +--- + +## Phase F — Server bring-up (Phase 3) + +### Task 26: Pre-flight on 100.108.80.9 + +**Files:** none modified + +- [ ] **Step 1: SSH in** + +```bash +sshpass -p '1234' ssh -o StrictHostKeyChecking=no rick@100.108.80.9 +``` + +- [ ] **Step 2: Stop and remove Skyvern (SPEC §1 goal 5)** + +```bash +docker stop skyvern-skyvern-1 skyvern-skyvern-ui-1 skyvern-postgres-1 +docker rm skyvern-skyvern-1 skyvern-skyvern-ui-1 +# Keep the postgres volume in case the operator wants to bring Skyvern back; +# only kill its container so the postgres port doesn't conflict. +docker rm skyvern-postgres-1 +docker volume ls | grep skyvern # leave volumes; just removed containers +``` + +- [ ] **Step 3: Verify 6080/9222 are free** + +```bash +ss -tlnp | grep -E ':(6080|9222) ' || echo "ports free" +# Expected: "ports free" +``` + +- [ ] **Step 4: Create stack dir** + +```bash +mkdir -p ~/autocli-stack/{prometheus,grafana/provisioning/datasources,grafana/provisioning/dashboards} +``` + +- [ ] **Step 5: Verify GHCR pull works on this host** + +```bash +docker pull ghcr.io/ricksanchez88e/autocli-chrome:branch-feat-daily-microservice +docker pull ghcr.io/ricksanchez88e/autocli-daily:branch-feat-daily-microservice +``` + +Both should succeed. If 401/403 → run `echo $GHCR_PAT | docker login ghcr.io -u ricksanchez88e --password-stdin` first. + +No commit. + +--- + +### Task 27: scp compose + provisioning + .env to server + +**Files:** none modified (on the server side, files arrive via scp) + +- [ ] **Step 1: From the worktree, scp config** + +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI-daily +sshpass -p '1234' scp deploy/docker-compose.yml rick@100.108.80.9:~/autocli-stack/ +sshpass -p '1234' scp deploy/prometheus/prometheus.yml rick@100.108.80.9:~/autocli-stack/prometheus/ +sshpass -p '1234' scp deploy/grafana/provisioning/datasources/prometheus.yml \ + rick@100.108.80.9:~/autocli-stack/grafana/provisioning/datasources/ +sshpass -p '1234' scp deploy/grafana/provisioning/dashboards/dashboards.yml \ + deploy/grafana/provisioning/dashboards/autocli.json \ + rick@100.108.80.9:~/autocli-stack/grafana/provisioning/dashboards/ +``` + +- [ ] **Step 2: Prepare .env on server (token + secrets)** + +```bash +# Operator: ensure ~/.autocli-secrets.env on your laptop has: +# CLOUDFLARE_TUNNEL_TOKEN=eyJh... +# SUPABASE_URL=https://... +# SUPABASE_SERVICE_ROLE_KEY=... +# SUPABASE_ANON_KEY=... +cp ~/.autocli-secrets.env /tmp/autocli-secrets.$$.env + +# Append generated values +cat >> /tmp/autocli-secrets.$$.env <`. Log into linkedin.com once. Close the tab. + +After Cloudflare ingress (Phase 4) is up the operator will use `autocli-vnc./vnc.html` instead; Tailscale path is the bootstrap. + +No commit. + +--- + +## Phase G — Cloudflare Tunnel + Access (Phase 4) + +### Task 29: Phase 4a — 3 subdomains in Cloudflare dashboard + +**Files:** none modified + +This is **operator UI work** (Cloudflare Zero Trust dashboard). + +- [ ] **Step 1: Tunnel → Public Hostnames** + +In the existing Tunnel (the one whose token is in `.env`), add: +1. `autocli-vnc.` → `http://autocli-chrome:6080` +2. `autocli-api.` → `http://autocli-daily:8080` +3. `autocli-grafana.` → `http://grafana:3000` + +**Do NOT add `autocli-cdp` yet.** + +- [ ] **Step 2: Access → Applications** + +Create one Self-Hosted Application per subdomain. Policies per SPEC §5.3: +- `autocli-vnc`: Policy "operator email + WARP device posture" only. +- `autocli-api`: Policy "Service Token" AND Policy "operator email" (OR semantics within Application). +- `autocli-grafana`: Policy "operator email OTP" only. + +Create a **Service Token** under Access → Service Auth. Save the `Client ID` and `Client Secret` — these are the `CF_ID` / `CF_SECRET` used by Phase 4 probes. + +- [ ] **Step 3: Phase 4a probes (run from operator's laptop)** + +```bash +DOMAIN="" +CF_ID="" +CF_SECRET="" +TOKEN="$(sshpass -p '1234' ssh rick@100.108.80.9 'grep ^API_RUN_TOKEN= ~/autocli-stack/.env | cut -d= -f2-')" + +# 1. Unauthenticated → all three should 302 +for sub in vnc api grafana; do + code=$(curl -s -o /dev/null -w "%{http_code}" "https://autocli-${sub}.${DOMAIN}/") + echo "${sub} unauth: ${code}" +done +# Expected: each "302" + +# 2. Service Token on humans-only subdomains → still 302 +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-vnc.${DOMAIN}/" | head -1 +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-grafana.${DOMAIN}/" | head -1 +# Expected: both HTTP/2 302 + +# 3. api.autocli — Service Token grants access +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-api.${DOMAIN}/api/health" | head -1 +# Expected: HTTP/2 200 + +# 4. Bearer enforcement +curl -sI -X POST -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-api.${DOMAIN}/api/run" | head -1 +# Expected: HTTP/2 401 + +curl -sI -X POST -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://autocli-api.${DOMAIN}/api/run" | head -1 +# Expected: HTTP/2 202 (or 409 if a run is already in flight — re-run after a couple of min) + +# 5. /jobs (Bearer required) +curl -s -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://autocli-api.${DOMAIN}/jobs?since=2026-05-15" | jq '.count' +# Expected: ≥ 0 +``` + +All 6 probes must match. Halt the plan if any fails — Phase 4b is gated on this. + +No commit. + +--- + +### Task 30: Phase 4b — Add cdp.autocli ingress + Access Application + +**Files:** none modified + +- [ ] **Step 1: Verify Phase 4a was clean** + +If any probe in Task 29 failed, **stop** and fix before adding cdp ingress. + +- [ ] **Step 2: Generate a dedicated CDP Service Token** + +Cloudflare dashboard → Access → Service Auth → Create Service Token "autocli-cdp" (separate from the api.autocli one). Save `CF_ID_CDP` / `CF_SECRET_CDP`. + +- [ ] **Step 3: Generate operator mTLS client cert** + +Cloudflare dashboard → Access → Service Auth → mTLS → Create CA, then issue a client cert. Download as PEM: +- `~/.cf-access/cdp-client.crt` +- `~/.cf-access/cdp-client.key` +Set permissions `chmod 600 ~/.cf-access/cdp-client.*`. + +- [ ] **Step 4: Create cdp.autocli Access Application** + +Application → Self-Hosted → hostname `autocli-cdp.`. Add: +- Policy A (machines): require Service Token = `autocli-cdp` **AND** mTLS client cert valid for the CA above. +- Policy B (humans): require operator email + **required** WARP device posture. + +- [ ] **Step 5: Add Tunnel ingress for cdp.autocli** + +Tunnel → Public Hostnames → Add `autocli-cdp.` → `http://autocli-chrome:9222`. + +No commit. + +--- + +### Task 31: Phase 4c — cdp.autocli probes + +**Files:** none modified + +Run from operator's laptop: + +- [ ] **Step 1: HTTP probes 4c-1 through 4c-3** + +```bash +DOMAIN="" +CF_ID="" +CF_SECRET="" +CF_ID_CDP="" +CF_SECRET_CDP="" + +# 4c-1. Unauthenticated → 302 +curl -s -o /dev/null -w "%{http_code}\n" "https://autocli-cdp.${DOMAIN}/json/list" +# Expected: 302 + +# 4c-2. api-scoped token (wrong scope, no mTLS) → 302 or 403 +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-cdp.${DOMAIN}/json/list" | head -1 +# Expected: HTTP/2 302 or HTTP/2 403 + +# 4c-3. Correct cdp token + mTLS → 200 +curl -sI \ + -H "CF-Access-Client-Id: ${CF_ID_CDP}" -H "CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --cert "$HOME/.cf-access/cdp-client.crt" --key "$HOME/.cf-access/cdp-client.key" \ + "https://autocli-cdp.${DOMAIN}/json/list" | head -1 +# Expected: HTTP/2 200 +``` + +- [ ] **Step 2: WebSocket probe 4c-4** + +Install websocat first (`brew install websocat` or `apt install websocat`). + +```bash +WS_URL=$(curl -s \ + -H "CF-Access-Client-Id: ${CF_ID_CDP}" -H "CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --cert "$HOME/.cf-access/cdp-client.crt" --key "$HOME/.cf-access/cdp-client.key" \ + "https://autocli-cdp.${DOMAIN}/json/list" \ + | jq -r '[.[] | select(.type == "page")][0].webSocketDebuggerUrl' \ + | sed -E "s|ws://[^/]+|wss://autocli-cdp.${DOMAIN}|") +echo "WS_URL=${WS_URL}" + +echo '{"id":1,"method":"Target.getTargets"}' \ + | websocat -1 -t \ + --header="CF-Access-Client-Id: ${CF_ID_CDP}" \ + --header="CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --client-pkcs12-der "$HOME/.cf-access/cdp-client.p12" \ + "${WS_URL}" \ + | jq '.result.targetInfos | length' +# Expected: ≥ 1 +``` + +(If websocat isn't installable, use the curl --http1.1 fallback in SPEC §7 Phase 4c-4 step 2 fallback block.) + +All probes match → the CDP surface is live. No commit. + +--- + +## Phase H — Production forced run + monitoring (Phase 5) + +### Task 32: Forced run via API + verification + +**Files:** none modified + +- [ ] **Step 1: Trigger** + +```bash +curl -X POST \ + -H "CF-Access-Client-Id: $CF_ID" \ + -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/run +# Expected: {"started_at": ..., "pid": ...} with HTTP 202 +``` + +- [ ] **Step 2: Poll status** + +```bash +sleep 300 # generous: gives all 3 retry attempts time to complete +curl -s \ + -H "CF-Access-Client-Id: $CF_ID" -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/status | jq +``` + +Expected: `last_exit_code: 0`, `rows_upserted > 0`, `run_in_progress: false`. + +- [ ] **Step 3: Verify Supabase rows** + +In Supabase SQL editor: +```sql +SELECT count(*) FROM jobs.jobs WHERE created_at::date = current_date; +``` + +Expected: matches the `rows_upserted` from `/api/status`. + +- [ ] **Step 4: Verify Grafana dashboard** + +Browser to `https://autocli-grafana.` → login → Dashboards → "AutoCLI Daily": +- "Time since last run" should be small (single-digit minutes) +- "Last exit code" = 0 (green) +- "Rows upserted today" = same number as Step 3 +- CDP-up gauge near 100% + +No commit. + +--- + +### Task 33: Phase 6 — schedule rollover observation + +**Files:** none modified + +- [ ] **Step 1: Wait until tomorrow 03:00 BST + 30 min** + +(Calendar event, not a step you can execute right now.) + +- [ ] **Step 2: Verify the scheduled run happened** + +```bash +curl -s \ + -H "CF-Access-Client-Id: $CF_ID" -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/status | jq '.last_run_unixts | strftime("%Y-%m-%d %H:%M:%S")' +``` + +Expected: timestamp within 5 minutes of 03:00 (today's date). + +- [ ] **Step 3: Repeat on day 3** + +If two consecutive scheduled runs pass without intervention, declare Phase 6 done. + +No commit. + +--- + +### Task 34: Open PR + +**Files:** none modified + +- [ ] **Step 1: Push final state** + +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI-daily +git push +``` + +- [ ] **Step 2: Open PR** + +```bash +gh pr create \ + --base main \ + --head feat/daily-microservice \ + --title "feat: daily LinkedIn microservice + autocli CDP wiring" \ + --body "$(cat <<'EOF' +## Summary + +Implements the auto-scheduled daily LinkedIn-recommended pipeline as a +microservice on `100.108.80.9` (Tailscale). See `deploy/SPEC.md` for design +and `deploy/PLAN.md` for the build walkthrough. + +- **Prereq Rust patch** (crates/autocli-browser): wires `CdpPage` into + `BrowserBridge::connect` behind `AUTOCLI_CDP_ENDPOINT`. Required so the + daily container can drive a sibling Chrome container without the + extension+daemon path. +- **`rust-toolchain.toml`**: pins workspace to 1.94 so local / CI / Phase 0 + builder all agree. +- **`deploy/`**: chrome image (Stagehand-style VNC Chromium), daily image + (Python+supercronic+FastAPI+pre-built autocli), docker-compose for prod + and local, prometheus + grafana provisioning, README runbook. +- **`.github/workflows/deploy-microservice.yml`**: GHCR builds with + branch-safe slugified tags; only `:main` reaches Watchtower in prod. +- Verified live on `100.108.80.9` via Cloudflare Tunnel (`vnc/cdp/api/grafana`.autocli.), Phase 4a-4c probes all green, one forced run wrote + N rows to Supabase, Grafana dashboard provisions automatically. + +## Test plan + +- [x] Rust unit test: `bridge::tests::test_connect_uses_cdp_endpoint_when_env_var_set` +- [x] FastAPI tests: 9 cases covering auth + route shape +- [x] Phase 0: ELF/x86-64 verified; both images build +- [x] Phase 1 local e2e: forced run, Supabase rows landed +- [x] Phase 2 CI: green on feature branch, correct tag set +- [x] Phase 3: 5 containers healthy on `100.108.80.9` +- [x] Phase 4a/4b/4c: all probes match expected codes +- [x] Phase 5: production forced run, Grafana populated +- [ ] Phase 6: two consecutive scheduled runs (calendar-dependent; will + tick off after the second 03:00 BST tick post-merge) +EOF +)" +``` + +- [ ] **Step 3: Flip server to `:main` tag after merge** + +(After PR is merged.) +```bash +sshpass -p '1234' ssh rick@100.108.80.9 'cd ~/autocli-stack && \ + sed -i "s|:branch-feat-daily-microservice|:main|" docker-compose.yml && \ + docker compose pull && docker compose up -d' +``` + +Watchtower will keep `:main` fresh thereafter. + +No commit. + +--- + +## Self-Review + +**Spec coverage check** (against `deploy/SPEC.md` sections): + +- Prerequisite Patch: ✅ Tasks 1–4 +- §2 Architecture (5 services + topology): ✅ Tasks 18 (compose) + 16 (prometheus) + 17 (grafana) +- §3 Repo Layout + Worktree: ✅ matches PLAN file-map; worktree already created +- §4 Image Build Pipeline: ✅ Tasks 22 (workflow) + 23 (local mirror) +- §5.1 Process tree (cdp-discover + supercronic + uvicorn): ✅ Tasks 8/9/10/11 +- §5.2 Invariants (discovery cadence, retry policy): ✅ Tasks 8/9 +- §5.3 Cloudflare Tunnel + Access: ✅ Tasks 29/30/31 +- §5.4 Prometheus metrics: ✅ Task 14 (FastAPI exposes them) + Task 16 (scrape config) +- §5.5 Grafana dashboard: ✅ Task 17 +- §6 Secrets: ✅ Task 27 (server-side transfer) +- §7 Phase 0–6 acceptance: ✅ Tasks 23/24/25/28/29/30/31/32/33 +- §9 Risks: ✅ Task 30 explicitly gated on Task 29 being clean (CDP-public-exposure mitigation) + +**Placeholder scan:** No "TBD" / "implement later" / "fill in details". Every step has the actual code or command. + +**Type / name consistency:** Path `deploy/daily/api/main.py` referenced as `main:app` in Dockerfile, entrypoint, and tests. `AUTOCLI_CDP_ENDPOINT` referenced identically in Rust patch, cdp-discover.sh, run-daily.sh. `API_RUN_TOKEN` referenced identically in env, FastAPI module, tests, and curl probes. + +**Scope check:** Single PR delivers a working, testable system. The Rust patch is a hard prereq embedded in this plan rather than a parallel PR (acceptable per SPEC §1.A wording — "before microservice work merges"; combining into one PR satisfies that). + +--- + +## Execution Handoff + +Plan complete and saved to `deploy/PLAN.md`. Two execution options: + +1. **Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration. Best for a 34-task plan because each subagent gets clean context. + +2. **Inline Execution** — Execute tasks in this session using `superpowers:executing-plans`, batch execution with checkpoints for review. + +**Which approach?** diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 0000000..2a23bbd --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,59 @@ +# AutoCLI Daily Microservice — Deploy + +See [`SPEC.md`](./SPEC.md) for design, [`PLAN.md`](./PLAN.md) for the implementation walkthrough. +This file is the operator-facing runbook. + +## Quickstart on a fresh host + +```bash +ssh rick@100.108.80.9 +mkdir -p ~/autocli-stack && cd ~/autocli-stack + +# 1. Copy compose files + .env (scp from your laptop) +# (See SPEC §6.3 for the secret-transfer mechanism.) +cp deploy/docker-compose.yml . +cp deploy/.env.example .env +$EDITOR .env # fill every blank + +# 2. Bring up the stack +docker compose pull +docker compose up -d +docker compose ps # all 5 should be healthy + +# 3. One-time LinkedIn login via VNC +# Browse to https://autocli-vnc./vnc.html, password from .env +# Log into linkedin.com once, profile cookies persist in the +# `chrome-profile` named volume. + +# 4. Probe the surface (see SPEC §7 Phase 4a) +``` + +## Cloudflare dashboard checklist + +For each subdomain (`vnc`, `cdp`, `api`, `grafana`): +1. Tunnel → Public Hostnames → Add → set service URL to + `http://autocli-chrome:6080` / `http://autocli-chrome:9222` / + `http://autocli-daily:8080` / `http://grafana:3000`. +2. Access → Applications → Add Application → Self-Hosted → + `.autocli.` → policies per SPEC §5.3 table. +3. **Defer adding `autocli-cdp` until Phase 4a is green for the + other three subdomains** (SPEC §9 risk 1). + +## Forced run + +```bash +curl -X POST \ + -H "CF-Access-Client-Id: $CF_ID" \ + -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/run +``` + +## Troubleshooting + +| Symptom | Where to look | +|---|---| +| `/api/health` 503 | `docker logs autocli-chrome` — usually profile lock or socat | +| LinkedIn login expired | VNC in, re-login. Cookies persist in `chrome-profile` volume | +| Tunnel 502 | `docker logs autocli-cloudflared`; check token | +| Watchtower didn't pull new image | Check it's running (`docker ps \| grep watchtower`); 5-min poll | diff --git a/deploy/SPEC.md b/deploy/SPEC.md new file mode 100644 index 0000000..c7a8716 --- /dev/null +++ b/deploy/SPEC.md @@ -0,0 +1,729 @@ +# AutoCLI Daily Microservice — Design + +| | | +|---|---| +| **Date** | 2026-05-16 | +| **Branch** | `feat/daily-microservice` (separate worktree, branched from `main`) | +| **Target host** | `100.108.80.9` (Tailscale, Ubuntu 24.04, Docker 29.4) | +| **Public endpoint** | 4 subdomains under `` — `autocli-vnc.`, `autocli-cdp.`, `autocli-api.` (carries `/api/*` AND `/jobs`), `autocli-grafana.` — via Cloudflare Tunnel `--token` mode | +| **Goal** | Convert the manual daily flow (`autocli linkedin recommended … | uv run scripts/sync_autocli_jobs.py`) into an auto-scheduled, externally accessible microservice with monitoring. | + +--- + +## 1. Problem & Motivation + +Every day the operator runs by hand: + +```bash +autocli linkedin recommended --limit 0 --with_jd true -f json > output/$(date +%Y%m%d).json +uv run scripts/sync_autocli_jobs.py --input output/$(date +%Y%m%d).json +``` + +This requires a logged-in Chrome on the operator's laptop. Goals of the redesign: + +1. **Detach from the laptop** — schedule on a server that's always on (`100.108.80.9`). +2. **Reuse the existing Stagehand-style Chrome setup** that already works locally (VNC + persistent profile + CDP 9222). +3. **Expose status/control over the public internet** with proper auth (operator wants on-the-go VNC re-login, manual run trigger, query proxy, and a Grafana dashboard). +4. **Use the existing pull-based deploy chain** (GHCR + Watchtower) — no new infra wheels. +5. **Stay decoupled**: separate images, no entanglement with the existing `skyvern-*`, `browserless`, `job-*` services on the host (Skyvern will be retired). + +--- + +## Prerequisite Patch — autocli CDP wiring (must merge first) + +**Problem.** `crates/autocli-browser/src/bridge.rs::BrowserBridge::connect()` currently has only one code path: spawn the daemon, wait for the Chrome extension to connect over WebSocket, return a `DaemonPage`. The `CdpPage` type in `crates/autocli-browser/src/cdp.rs` is defined but never instantiated from the command-execution flow, and `AUTOCLI_CDP_ENDPOINT` is read only by `commands/doctor.rs` for diagnostics. In a containerised deploy the daemon-and-extension path does not work (extension cannot live in the same container as the daemon; `is_chrome_running()` uses `pgrep` which cannot see Chrome in a sibling container). + +**Patch.** Add a `AUTOCLI_CDP_ENDPOINT` branch at the top of `BrowserBridge::connect()`: + +```rust +pub async fn connect(&mut self) -> Result, CliError> { + if let Ok(endpoint) = std::env::var("AUTOCLI_CDP_ENDPOINT") { + let page = CdpPage::connect(&endpoint).await?; + return Ok(Arc::new(page)); + } + Ok(self.connect_daemon_page().await?) +} +``` + +When the env var is set we skip `is_chrome_running()`, `spawn_daemon()`, and `poll_extension()` entirely. The `IPage` trait is the same, so `autocli-pipeline` consumes either page implementation transparently. A small unit test covers the env-var branch with a mock CDP endpoint. + +**Scope of the patch.** +- File touched: `crates/autocli-browser/src/bridge.rs` (≈10 LOC) + one test. +- No change to `IPage`, `autocli-pipeline`, or YAML adapter execution. +- Lands on `main` in its own PR **before** the microservice work merges; the daily-image CI build pins to that commit. + +**Verification of the patch (locally, before this design's Phase 0).** +```bash +AUTOCLI_CDP_ENDPOINT=ws://localhost:9222/devtools/page/ \ + cargo run --release -p autocli -- linkedin recommended --limit 5 -f json +``` +Run against the operator's local Stagehand Chrome. Expect a non-empty JSON array. Failure means the patch must be revised before the microservice work proceeds. + +--- + +## 2. Architecture + +### 2.1 Container topology (5 services on a dedicated docker-compose stack) + +``` +┌─ 100.108.80.9 : docker-compose stack "autocli-stack" ────────────────┐ +│ │ +│ autocli-chrome autocli-daily cloudflared │ +│ (Stagehand image) (cron + FastAPI) (Tunnel daemon) │ +│ :6080 :9222 :5900 :8080 (no exposed port) │ +│ ▲ ▲ │ │ +│ │CDP (9222) │HTTP │ │ +│ └────── docker bridge ──┘ │ │ +│ │ │ +│ prometheus :9090 ──▶ scrapes daily:8080/api/metrics│ │ +│ ▲ │ │ +│ │ │ │ +│ grafana :3000 ──▶ datasource = prometheus │ │ +│ ▲ │ │ +│ └───────────────────────────────────────────────┘ │ +│ │ │ +└────────────────────────────────────────────────────────┼─────────────┘ + ▼ + Cloudflare Edge (HTTPS + Access) + ▼ + autocli-vnc. → chrome:6080 + autocli-cdp. → chrome:9222 (strict Access) + autocli-api. → daily:8080 (serves /api/* AND /jobs) + autocli-grafana. → grafana:3000 +``` + +### 2.2 Component contracts + +| Container | Responsibility | Owns | Depends on | +|---|---|---|---| +| `autocli-chrome` | Long-running Chromium with persistent profile and CDP exposure | `chrome-profile` volume | nothing | +| `autocli-daily` | Daily cron, manual `/run`, status & metrics API, Supabase proxy | `data/output/`, `data/logs/`, `run-daily.lock` | `autocli-chrome:9222` | +| `cloudflared` | Cloudflare Tunnel ingress | tunnel credentials env | Cloudflare edge | +| `prometheus` | Scrape `autocli-daily:8080/api/metrics` every 15 s | `prom-data` volume | `autocli-daily:8080` | +| `grafana` | Visualise metrics; pre-provisioned dashboard | `grafana-data` volume | `prometheus:9090` | + +Boundaries: +- `autocli-chrome` does not know it is being used by `autocli-daily`; it only speaks CDP. Replace it with any CDP-speaking Chrome and the rest still works. +- `autocli-daily` discovers Chrome via `curl http://autocli-chrome:9222/json/list` (creating a page with `PUT /json/new?about:blank` if the list is empty) at boot and at every `/api/run`, never hard-codes a page id. See §5.2 for the host-rewrite step. + +--- + +## 3. Repository Layout & Worktree + +### 3.1 New files inside the existing `AutoCLI` repo + +``` +AutoCLI/ +├── (existing content untouched) +└── deploy/ ← new top-level directory + ├── chrome/ + │ ├── Dockerfile ← copy of my-stagehand-app/Dockerfile.chrome + │ └── entrypoint-vnc.sh ← copy of entrypoint-vnc.sh + ├── daily/ + │ ├── Dockerfile ← multi-stage: python-slim + COPY autocli binary + │ ├── entrypoint.sh ← starts supercronic + uvicorn in parallel + │ ├── crontab ← "0 3 * * * /app/run-daily.sh" + │ ├── run-daily.sh ← orchestrator (flock + retry + log) + │ └── api/ + │ ├── pyproject.toml ← uv-managed (fastapi, supabase, prometheus-client) + │ ├── main.py ← FastAPI routes: /api/{status,run,logs,metrics,health} + /jobs + │ └── trigger.py ← shared run-daily executor used by cron + /run + ├── prometheus/ + │ └── prometheus.yml ← single scrape job + ├── grafana/ + │ └── provisioning/ + │ ├── datasources/prometheus.yml + │ └── dashboards/autocli.json ← pre-built dashboard JSON + ├── docker-compose.yml ← production stack (5 services) + ├── docker-compose.local.yml ← override for laptop e2e testing + ├── .env.example ← every required variable, with empty values + └── README.md ← deploy & runbook + +.github/workflows/ +└── deploy-microservice.yml ← CI: build binary + 2 images → push GHCR +``` + +### 3.2 Worktree strategy + +- **Branch**: `feat/daily-microservice` — branched from `main` (not from `feat/indeed-search-adapter`). +- **Worktree path**: `/Users/sanchezrick/Documents/Github/AutoCLI-daily/` +- **Reason**: keep this work isolated from the in-flight Indeed adapter PR; merge order independent. + +Created with: +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI +git worktree add ../AutoCLI-daily -b feat/daily-microservice main +``` + +### 3.3 Why one repo, not two + +- We need the `autocli` binary built from `crates/`, and we ship `scripts/sync_autocli_jobs.py` inside the daily image. Single repo = atomic PRs that change both the code and the deploy config. +- A single GitHub Actions workflow handles both images. + +--- + +## 4. Image Build Pipeline + +### 4.1 GitHub Actions (`deploy-microservice.yml`) + +```yaml +on: + push: + branches: [feat/daily-microservice, main] + paths: + - deploy/** + - crates/** + - scripts/sync_autocli_jobs.py + - .github/workflows/deploy-microservice.yml + workflow_dispatch: + +env: + # :main only on main; feature branches publish :branch- + :sha-. + # Watchtower in prod tracks :main → feature branches NEVER reach prod by accident. + IS_MAIN: ${{ github.ref == 'refs/heads/main' }} + +jobs: + build-autocli-binary: + runs-on: ubuntu-latest # x86_64 host = matches prod + steps: + - uses: actions/checkout@v4 + - uses: Swatinem/rust-cache@v2 + - run: cargo build --release -p autocli # crate name is `autocli` (per crates/autocli-cli/Cargo.toml) + - uses: actions/upload-artifact@v4 + with: { name: autocli-bin, path: target/release/autocli } + + build-chrome-image: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: { registry: ghcr.io, username: ${{ github.actor }}, password: ${{ secrets.GITHUB_TOKEN }} } + # NOTE (slugifier): `type=ref,event=branch` runs metadata-action's slugifier — + # `feat/daily-microservice` → `branch-feat-daily-microservice` (Docker-tag-safe). + # Comments live outside `tags: |` because the YAML block-scalar would otherwise + # treat them as literal text and metadata-action would try to parse them as rules. + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-chrome + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . # unified context = repo root for BOTH images + file: deploy/chrome/Dockerfile # COPY paths in Dockerfile are repo-relative + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: true + + build-daily-image: + runs-on: ubuntu-latest + needs: [build-autocli-binary] + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: { name: autocli-bin, path: deploy/daily/bin } + - run: chmod +x deploy/daily/bin/autocli + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: { registry: ghcr.io, username: ${{ github.actor }}, password: ${{ secrets.GITHUB_TOKEN }} } + - id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/ricksanchez88e/autocli-daily + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ env.IS_MAIN }} + type=ref,event=branch,prefix=branch-,enable=${{ env.IS_MAIN == 'false' }} + type=sha,prefix=sha-,format=short + - uses: docker/build-push-action@v6 + with: + context: . # same context as chrome image + file: deploy/daily/Dockerfile + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + push: true +``` + +### 4.2 Image sizes & decisions + +| Image | Base | Approx size | Why this base | +|---|---|---|---| +| `autocli-chrome` | `debian:bookworm-slim` | ~600 MB | Matches local dev image byte-for-byte (Chromium + Xvfb + noVNC + supervisor) | +| `autocli-daily` | `python:3.12-slim-bookworm` | ~200 MB | Need uv + supabase-py + fastapi; autocli binary is a static-ish ELF copied in | +| `cloudflared` | `cloudflare/cloudflared:2025.4.0` | ~30 MB | Pinned to a specific release — reproducible deploys, no surprise upgrades | +| `prometheus` | `prom/prometheus:v3.5.0` | ~280 MB | Pinned semver | +| `grafana` | `grafana/grafana:11.6.0` | ~400 MB | Pinned semver | + +> Watchtower **must not** auto-upgrade these three — they don't carry the `com.centurylinklabs.watchtower.enable` label. + +### 4.3 Watchtower integration + +Both Autocli images get: +```yaml +labels: + com.centurylinklabs.watchtower.enable: "true" +``` +The existing `job-watchtower` (5 min poll, `WATCHTOWER_LABEL_ENABLE=true`, `WATCHTOWER_CLEANUP=true`) picks them up. **Only the `:main` tag is tracked in prod** — feature branches publish `:branch-*` and `:sha-*` only, so unmerged code can never reach the server. + +Cloudflared, Prometheus, Grafana run pinned versions (see §4.2) and **do not** carry the Watchtower label — upgrades are deliberate. + +### 4.4 GHCR pull credentials (already configured) + +Recon on 2026-05-16 confirmed `100.108.80.9` has `ghcr.io` in `~/.docker/config.json` and `docker pull ghcr.io/ricksanchez88e/job-scraper-api:main` succeeds. No new login step is needed. Phase 3 verifies with: + +```bash +docker pull ghcr.io/ricksanchez88e/autocli-chrome:main && \ +docker pull ghcr.io/ricksanchez88e/autocli-daily:main +``` + +If either pull fails with 401/403 (e.g. PAT expired): `echo $GHCR_PAT | docker login ghcr.io -u ricksanchez88e --password-stdin`. + +--- + +## 5. Runtime Flow + +### 5.1 Process tree inside `autocli-daily` + +``` +PID 1 : tini + ├─ /app/cdp-discover.sh (runs once at boot, blocks until chrome ready) + │ GET http://autocli-chrome:9222/json/list + │ if no type:"page" target → PUT http://autocli-chrome:9222/json/new?about:blank + │ extract webSocketDebuggerUrl, rewrite host (localhost:9223 → autocli-chrome:9222) + │ write the resulting ws:// URL to /run/cdp-endpoint.env → AUTOCLI_CDP_ENDPOINT + │ (boot run gates supercronic + uvicorn startup; see §5.2) + │ + ├─ supercronic /etc/cron.d/autocli (TZ=Europe/London; starts only after the boot cdp-discover.sh exits 0) + │ └─ "0 3 * * * /app/run-daily.sh" + │ └─ /app/cdp-discover.sh # re-discover every run — Chrome may have restarted, page id may have changed + │ └─ source /run/cdp-endpoint.env # pick up the fresh AUTOCLI_CDP_ENDPOINT + │ └─ /app/bin/autocli linkedin recommended --limit 0 --with_jd true -f json + │ > /data/output/$(date +%Y%m%d).json + │ └─ uv run /app/scripts/sync_autocli_jobs.py --input /data/output/... + │ └─ update last_run.json + emit prometheus metrics file + │ + └─ uvicorn api.main:app --host 0.0.0.0 --port 8080 + FastAPI routes (all under /api/* — Prometheus scrape uses /api/metrics): + ├─ GET /api/status [Bearer] last_run.json {last_run_unixts, exit_code, rows_*, errors[]} + ├─ POST /api/run [Bearer] spawns run-daily.sh (flock-protected) + ├─ GET /api/logs [Bearer] tail -n 200 /data/logs/run-.log + ├─ GET /api/metrics [open] Prometheus exposition; app-layer open. In-cluster scrape hits this directly; + │ external callers via api.autocli are still gated by Cloudflare Access. + ├─ GET /api/health [open] 200 iff chrome:9222 reachable AND /run/cdp-endpoint.env exists. + │ Same dual-path: docker healthcheck internal, public access via api.autocli + Cloudflare Access. + └─ GET /jobs?since=… [Bearer] + → client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY) + → client.schema("jobs").table("jobs").select(...).gte("post_time", since).execute() + (matches scripts/backfill_priority_scores.py — schema-qualified table API) + + [Bearer] = Authorization: Bearer ${API_RUN_TOKEN} required at the FastAPI layer; missing/wrong → 401. + [open] = no Bearer at the app layer; external requests still hit Cloudflare Access first. + Inside the docker network (Prometheus scrape, docker healthcheck) requests bypass both. +``` + +### 5.2 Invariants + +- **CDP page target, not browser endpoint**: `/json/version` returns a browser-level WebSocket that does not accept page-scoped commands. `cdp-discover.sh` therefore hits `GET /json/list`, picks the first `type:"page"` target, and if none exists `PUT /json/new?about:blank` to create one. (Chrome ≥ M86 rejects `GET` and `POST` on `/json/new` with `405 Method Not Allowed`; `PUT` is the only supported verb.) Only `webSocketDebuggerUrl` from that page target is exported as `AUTOCLI_CDP_ENDPOINT`. +- **Host rewrite**: the Stagehand image binds Chromium to `127.0.0.1:9223` (socat exposes 9222 publicly), so `/json/list` returns URLs like `ws://localhost:9223/devtools/page/`. `cdp-discover.sh` rewrites the host:port portion to `autocli-chrome:9222` (the docker-service-name + the externally-mapped port) before exporting. Confirmed against `~/Documents/Github/my-stagehand-app/scripts/entrypoint-vnc.sh`. +- **Discovery cadence**: `cdp-discover.sh` runs at TWO points — (1) at container boot, gating supercronic and uvicorn startup; (2) at the start of every `run-daily.sh` invocation (cron-driven AND `POST /api/run`-driven), before `AUTOCLI_CDP_ENDPOINT` is sourced. The per-run discovery refreshes the page id in case Chrome restarted or the page was closed between cron ticks. Boot retry: every 2 s, give up at 60 s with exit ≠ 0; `restart: unless-stopped` then recreates the container until Chrome is reachable. Per-run discovery uses the same retry budget but counts as a transient failure under §5.2 unified retry if it gives up. +- **Boot ordering**: `entrypoint.sh` runs the boot-time `cdp-discover.sh` synchronously first; only after it exits 0 does supercronic launch and uvicorn bind `:8080`. +- **Mutual exclusion**: `run-daily.sh` wraps the body in `flock -n /var/lock/autocli-daily.lock` — cron and `/api/run` cannot collide. +- **Retry policy (unified)**: a single backoff schedule applies to every transient failure (autocli exit ≠ 0, Supabase 429/5xx, CDP disconnect). Three attempts at **15 s → 60 s → 240 s**. On the 4th failure: record `last_exit_code` in `last_run.json`, increment `autocli_daily_runs_total{result="failure"}`, release the lock, log to `/data/logs/run-.log`. The next cron tick is the next retry opportunity. This single policy is referenced from runbook, code, metrics, and Phase-7 failure table — all kept in sync. +- **Output retention**: JSON files kept 30 days; a daily 04:00 cron entry runs `find /data/output -mtime +30 -delete`. +- **Timezone**: container `TZ=Europe/London`; cron expression `0 3 * * *` is 03:00 BST/GMT automatically. + +### 5.3 Cloudflare Tunnel — token mode + subdomain routing + +**Token mode, not credentials-file.** The operator already has a Tunnel token from the Cloudflare dashboard. cloudflared runs as: + +```yaml +# docker-compose.yml excerpt +cloudflared: + image: cloudflare/cloudflared:2025.4.0 + restart: unless-stopped + command: tunnel --no-autoupdate run --token ${CLOUDFLARE_TUNNEL_TOKEN} + environment: + TUNNEL_TOKEN: ${CLOUDFLARE_TUNNEL_TOKEN} + depends_on: [autocli-chrome, autocli-daily, grafana] + networks: [autocli-net] +``` + +In token mode **ingress rules live in the Cloudflare dashboard**, not in a local `config.yml`. There is no `${CLOUDFLARE_TUNNEL_ID}` interpolation problem (cloudflared does not parse a YAML at all) and no `credentials-file` to manage. The two modes are not mixed. + +**Subdomains, not path routes.** Cloudflare Tunnel does not strip the matched URL prefix, so `/cdp/json/version` would arrive at the origin as `/cdp/json/version` and Chromium would return 404. Each surface gets its own subdomain — no path rewriting required: + +| Public hostname | Origin (docker service) | Notes | +|---|---|---| +| `autocli-vnc.` | `http://autocli-chrome:6080` | noVNC web client | +| `autocli-cdp.` | `http://autocli-chrome:9222` | CDP HTTP + WebSocket upgrade | +| `autocli-api.` | `http://autocli-daily:8080` | FastAPI: `/api/*` (status, run, logs, metrics, health) AND `/jobs` (Supabase read proxy) | +| `autocli-grafana.` | `http://grafana:3000` | Subdomain → no `serve_from_sub_path` needed | + +These four hostnames are configured in the dashboard under the same Tunnel. Implementation produces a screenshot/checklist for the operator to apply. + +**Cloudflare Access — one Application per subdomain, two policies inside each.** Within a single Application multiple policies are evaluated as **OR** — a request matching any one policy is admitted. This lets us serve both humans and scripts on the same surface: + +| Subdomain | Policy A (machines) | Policy B (humans) | +|---|---|---| +| `autocli-cdp` | **Dedicated** Service Token (separate from api.autocli's), short-lived rotation, **plus** mTLS client-certificate validation on the same Access Application | Operator email OTP + **required** WARP device posture (Cloudflare WARP enrolled, healthy device) | +| `autocli-vnc` | — (humans only; scripts have no business here) | Operator email OTP + required WARP device posture | +| `autocli-api` | Service Token (used by `curl` / scripts for `/api/*` and `/jobs`) | Operator email OTP | +| `autocli-grafana` | — (humans only) | Operator email OTP | + +> **Why no IP allow-list on `autocli-cdp`.** Cloudflare Access sees the caller's public/WARP-egress IP at the Cloudflare edge — it never sees the Tailscale `100.x` CGNAT address (Tailscale only connects operator devices to the home server, not to Cloudflare). An IP allow-list scoped to "Tailscale range" would never match and would be misleading security theatre. We use **mTLS + dedicated short-lived Service Token + WARP posture** as the strong layers instead. + +`autocli-cdp` is the most sensitive surface — the CDP WebSocket is equivalent to a remote shell on the browser. Its Application therefore carries the extra mTLS requirement, a dedicated short-lived Service Token, and WARP-required human policy. (Network-selector / IP allow-lists are deliberately omitted; see callout above.) + +### 5.4 Prometheus metrics emitted by `autocli-daily` + +Exposed at `GET /api/metrics` (not `/metrics`). Prometheus scrape config must specify the path: + +```yaml +# prometheus.yml +scrape_configs: + - job_name: autocli-daily + metrics_path: /api/metrics + static_configs: + - targets: [autocli-daily:8080] +``` + +Sample exposition: + +``` +# HELP autocli_daily_last_run_unixts Unix timestamp of last run start +# TYPE autocli_daily_last_run_unixts gauge +autocli_daily_last_run_unixts 1747958400 + +autocli_daily_last_duration_seconds 142.3 +autocli_daily_last_exit_code 0 +autocli_daily_run_in_progress 0 +autocli_daily_runs_total{result="success"} 47 +autocli_daily_runs_total{result="failure"} 2 +autocli_daily_rows_scraped_total 12480 +autocli_daily_rows_upserted_total 9213 +autocli_daily_rows_skipped_total 3267 +autocli_chrome_cdp_up 1 +``` + +### 5.5 Grafana dashboard (`autocli.json`) + +Single dashboard, six panels (all backed by the bundled Prometheus datasource — no plugins to install): + +1. **Stat — Time since last run** (red if > 25 h) +2. **Stat — Last exit code** (green = 0) +3. **Stat — Rows scraped today** +4. **Time series — Daily scraped vs upserted vs skipped (30 d)** +5. **Time series — Run duration (30 d)** +6. **Stat — Chrome CDP up (24 h uptime %)** + +Logs are read out of band via `curl https://autocli-api./api/logs` or `docker logs autocli-daily`. A future PR may add Loki + a Grafana logs panel; that is out of scope for this design. + +Dashboard JSON and the datasource pointer are committed under `grafana/provisioning/`, so a fresh Grafana container reproduces the dashboard automatically. + +--- + +## 6. Secrets & Configuration + +### 6.1 Required environment variables + +| Variable | Consumer container | Source | Notes | +|---|---|---|---| +| `CLOUDFLARE_TUNNEL_TOKEN` | `cloudflared` | Operator (existing) | Long-lived tunnel JWT, passed via `--token` | +| `SUPABASE_URL` | `autocli-daily` | Operator's `.env` | Same name `scripts/sync_autocli_jobs.py` already reads | +| `SUPABASE_SERVICE_ROLE_KEY` | `autocli-daily` | Operator's `.env` | Matches the script's actual env-var name (or `SUPABASE_KEY` fallback). Never reaches chrome/cloudflared. | +| `SUPABASE_ANON_KEY` | `autocli-daily` | Operator's `.env` | Used by `/jobs` read-only path | +| `API_RUN_TOKEN` | `autocli-daily` | Generated at deploy (`openssl rand -hex 32`) | **Enforced** by FastAPI: `GET /api/status`, `POST /api/run`, `GET /api/logs`, AND `GET /jobs` all require `Authorization: Bearer ${API_RUN_TOKEN}`; missing/wrong → 401. (`/api/health` and `/api/metrics` are intentionally open — see §5.1 route table.) Defense-in-depth in case Cloudflare Access ever fails open. | +| `VNC_PASSWORD` | `autocli-chrome` | Generated at deploy (`openssl rand -base64 18`) | **Never** uses the local-dev default `stagehand` in prod; the operator gets the generated value once and stores it (1Password / similar). | +| `GF_SECURITY_ADMIN_PASSWORD` | `grafana` | Generated at deploy | Bootstrap admin | +| `TZ` | all | `Europe/London` | | +| `CRON_SCHEDULE` | `autocli-daily` | `0 3 * * *` | Override-able | + +### 6.2 Server file layout + +``` +/home/rick/autocli-stack/ +├── docker-compose.yml ← committed in repo, scp'd here at deploy +├── .env ← 600 perms, rick-only +├── data/ +│ ├── chrome-profile/ ← named-volume backing dir (LinkedIn login lives here) +│ ├── output/ ← daily JSONs, 30 d retention +│ ├── logs/ ← run-*.log +│ ├── prom-data/ ← prometheus tsdb +│ └── grafana-data/ ← grafana sqlite + plugins +``` + +> **No `cloudflared/` directory on the server** — token mode (§5.3) keeps ingress definitions in the Cloudflare dashboard, not in a file. + +### 6.3 Secret transfer mechanism + +For each secret the operator owns (`CLOUDFLARE_TUNNEL_TOKEN`, `SUPABASE_*`): + +1. Operator writes the value into a local file `~/.autocli-secrets.env` (`chmod 600`). **This is the operator's source-of-truth file and is never deleted by the agent.** +2. Implementation phase: agent runs `cp ~/.autocli-secrets.env /tmp/autocli-secrets.$$.env` to make a temp copy, then `scp /tmp/autocli-secrets.$$.env rick@100.108.80.9:~/autocli-stack/.env`, then `shred -u /tmp/autocli-secrets.$$.env` to wipe the temp copy only. +3. Generated secrets (`API_RUN_TOKEN`, `VNC_PASSWORD`, `GF_SECURITY_ADMIN_PASSWORD`) are produced *on the server* during deploy and appended directly to `~/autocli-stack/.env`; the values are printed once to the operator's terminal via the SSH session. +4. Secrets are never echoed to the chat transcript and never committed to git. + +### 6.4 Per-service env scoping + +`docker-compose.yml` does **not** use a global `env_file:` shortcut. Instead each service gets its own explicit `environment:` block referencing only the keys it needs. Example: `cloudflared` sees `CLOUDFLARE_TUNNEL_TOKEN` only; `autocli-chrome` sees `VNC_PASSWORD` only; Supabase keys live only inside `autocli-daily`. + +--- + +## 7. Acceptance Criteria & Phased Verification + +Each phase is a hard gate. Implementation moves to the next only after all checks of the previous pass. + +### Phase 0 — Local image build (context = repo root, matches CI) + +The operator's Mac is arm64-darwin; the production image runs linux/amd64. A `cargo build` on the host would produce a Mach-O binary that can't run inside a Linux container. The binary must therefore be built **inside a Docker rust container** that runs as `linux/amd64`. + +> **Toolchain pin.** `rust:1.94-slim-bookworm` matches the operator's local `rustc 1.94.1` (verified 2026-05-16) so Phase 0 / CI / dev agree byte-for-byte. The long-term hardening is to add a tracked `rust-toolchain.toml` at repo root (single source of truth for all three environments); until that PR lands, the pin is duplicated here and in the GitHub Actions workflow. + +```bash +cd /Users/sanchezrick/Documents/Github/AutoCLI-daily # the worktree, NOT deploy/ + +# Build autocli for linux/amd64 inside a throwaway Rust container. +# Volume-cache cargo registry so subsequent builds are fast. +mkdir -p deploy/daily/bin +docker run --rm --platform linux/amd64 \ + -v "$PWD":/work -w /work \ + -v autocli-daily-cargo-cache:/usr/local/cargo/registry \ + -v autocli-daily-cargo-target:/work/target \ + rust:1.94-slim-bookworm \ + bash -c "cargo build --release -p autocli && cp target/release/autocli deploy/daily/bin/autocli" +chmod +x deploy/daily/bin/autocli +file deploy/daily/bin/autocli # MUST say "ELF 64-bit LSB executable, x86-64" — not "Mach-O" + +# Now build images using the same context as CI. +docker buildx build --platform linux/amd64 -f deploy/chrome/Dockerfile -t test-chrome . +docker buildx build --platform linux/amd64 -f deploy/daily/Dockerfile -t test-daily . +docker run --rm --platform linux/amd64 test-daily /app/bin/autocli --version +``` + +✅ `file` shows ELF/x86-64; both images build; `autocli --version` returns a non-empty string from inside `test-daily`. + +> CI mirrors this: the `build-autocli-binary` GitHub Action runs on `ubuntu-latest` (linux/amd64) directly, so it doesn't need the Docker-rust trick — its host is already the target platform. + +### Phase 1 — Local e2e (no Cloudflare Tunnel) +```bash +docker compose -f deploy/docker-compose.local.yml up -d +# manual: open http://localhost:6081/vnc.html, log into LinkedIn once +LOCAL_TOKEN="$(grep ^API_RUN_TOKEN= deploy/.env.local | cut -d= -f2-)" + +curl -s http://localhost:8081/api/health # 200 (open endpoint) +docker exec autocli-daily-local /app/run-daily.sh # forced run +curl -s -H "Authorization: Bearer $LOCAL_TOKEN" \ + http://localhost:8081/api/status | jq # last_exit_code:0 +``` +✅ JSON written to `data/output/`; Supabase `jobs.jobs` has new rows; `/api/status` (with Bearer) shows `last_exit_code:0`. + +### Phase 2 — CI green & images on GHCR + +Tag expectations depend on the branch being pushed (matches §4.1 workflow): + +**Pushing `feat/daily-microservice`** (this design's working branch) — expect: +- `ghcr.io/ricksanchez88e/autocli-chrome:branch-feat-daily-microservice` +- `ghcr.io/ricksanchez88e/autocli-chrome:sha-` +- `ghcr.io/ricksanchez88e/autocli-daily:branch-feat-daily-microservice` +- `ghcr.io/ricksanchez88e/autocli-daily:sha-` +- **No `:main` tag** (would be a bug — Watchtower in prod tracks `:main`) + +**After merge to `main`** — expect (in addition to the existing sha tags): +- `ghcr.io/ricksanchez88e/autocli-chrome:main` +- `ghcr.io/ricksanchez88e/autocli-daily:main` + +Phase 3 server bring-up reads `:main` and therefore only runs after merge (or as a deliberate one-off pull of `:branch-*` for early staging). + +### Phase 3 — Server bring-up (executed by the implementing agent) +```bash +ssh rick@100.108.80.9 +docker stop skyvern-skyvern-1 skyvern-skyvern-ui-1 +docker rm skyvern-skyvern-1 skyvern-skyvern-ui-1 +mkdir -p ~/autocli-stack +# scp docker-compose.yml + .env (with secrets) here — no cloudflared config file in token mode +cd ~/autocli-stack +docker compose pull +docker compose up -d +``` +✅ `docker ps` shows 5 new containers healthy (`autocli-chrome`, `autocli-daily`, `cloudflared`, `prometheus`, `grafana`). Existing `job-*`, `sub2api*`, `browser-automation-*`, `browseruse-debug` untouched. + +### Phase 4 — Tunnel & Access reachable + +Phase 4 is split into three sub-phases because `autocli-cdp` is the high-risk surface and must not be exposed until the rest is proven. Each sub-phase is a hard gate. + +```bash +DOMAIN="" +CF_ID="" # from Cloudflare Access → Service Tokens +CF_SECRET="" +TOKEN="" +``` + +#### Phase 4a — Pre-CDP gate: add 3 subdomains to the Cloudflare Tunnel dashboard +Add `autocli-vnc`, `autocli-api`, `autocli-grafana` (NOT `autocli-cdp` yet). Then probe: + +```bash +# Unauthenticated → 302 to Cloudflare Access login (never 200, never 502) +for sub in vnc api grafana; do + code=$(curl -s -o /dev/null -w "%{http_code}" "https://autocli-${sub}.${DOMAIN}/") + echo "${sub} unauth: ${code}" # MUST be 302 +done + +# Humans-only negative machine probe — vnc/grafana have NO Service Token policy by design. +# Sending a Service Token must STILL produce 302 (machines are not admitted; only email OTP is). +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-vnc.${DOMAIN}/" | head -1 # MUST be 302 — proves Service Token does NOT grant access here +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-grafana.${DOMAIN}/" | head -1 # MUST be 302 — same negative-probe semantics + +# Machine probe — api.autocli is the ONLY subdomain in 4a with a Service Token policy +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-api.${DOMAIN}/api/health" | head -1 # HTTP/2 200 (open endpoint after Access) +curl -s -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://autocli-api.${DOMAIN}/api/metrics" | grep -c autocli_daily_ # ≥ 5 + +# API_RUN_TOKEN enforcement (independent of Cloudflare Access) +curl -sI -X POST -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-api.${DOMAIN}/api/run" | head -1 # HTTP/2 401 (no Bearer) +curl -sI -X POST -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://autocli-api.${DOMAIN}/api/run" | head -1 # HTTP/2 202 + +# /jobs read proxy +curl -s -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + -H "Authorization: Bearer ${TOKEN}" \ + "https://autocli-api.${DOMAIN}/jobs?since=2026-05-15" | jq 'length' # ≥ 0 +``` + +✅ All Phase 4a probes match expected codes. **Phase 4b runs only if every line above passed.** + +#### Phase 4b — Add cdp.autocli to the Cloudflare Tunnel dashboard +Only after Phase 4a is green: +1. Confirm Access Application for `autocli-cdp` exists with **two** policies (dedicated short-lived Service Token + mTLS client-cert; AND operator email + required WARP device posture). See §5.3. +2. Add the hostname to the Tunnel dashboard pointing at `http://autocli-chrome:9222`. + +#### Phase 4c — Probe cdp.autocli + +The `autocli-cdp` Application carries the **dedicated** Service Token (different `CF_ID_CDP` / `CF_SECRET_CDP`) plus mTLS — `autocli-api`'s credentials must be rejected. Three HTTP probes plus one real WebSocket probe: + +```bash +# 4c-1. Unauthenticated → 302 +curl -s -o /dev/null -w "%{http_code}\n" "https://autocli-cdp.${DOMAIN}/json/list" # 302 + +# 4c-2. Wrong (api-scoped) Service Token + no mTLS → 302/403 (proves CDP token is scoped) +curl -sI -H "CF-Access-Client-Id: ${CF_ID}" -H "CF-Access-Client-Secret: ${CF_SECRET}" \ + "https://autocli-cdp.${DOMAIN}/json/list" | head -1 # MUST be 302 or 403 + +# 4c-3. Right CDP Service Token + mTLS client cert → 200 +curl -sI \ + -H "CF-Access-Client-Id: ${CF_ID_CDP}" -H "CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --cert "$HOME/.cf-access/cdp-client.crt" --key "$HOME/.cf-access/cdp-client.key" \ + "https://autocli-cdp.${DOMAIN}/json/list" | head -1 # HTTP/2 200 + +# 4c-4. Real WebSocket probe — must use a WebSocket client, NOT `curl -I` (HEAD). +# Cloudflare WS uses HTTP/1.1 Upgrade semantics (101 Switching Protocols); +# HTTP/2 has its own multiplexed WS (RFC 8441) but Cloudflare doesn't speak it, +# so the probe MUST force HTTP/1.1. +# +# Step 1: extract the actual page websocket URL and rewrite host to autocli-cdp. +WS_URL=$(curl -s \ + -H "CF-Access-Client-Id: ${CF_ID_CDP}" -H "CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --cert "$HOME/.cf-access/cdp-client.crt" --key "$HOME/.cf-access/cdp-client.key" \ + "https://autocli-cdp.${DOMAIN}/json/list" \ + | jq -r '[.[] | select(.type == "page")][0].webSocketDebuggerUrl' \ + | sed -E "s|ws://[^/]+|wss://autocli-cdp.${DOMAIN}|") +echo "WS URL: ${WS_URL}" # MUST be a non-empty wss://autocli-cdp.${DOMAIN}/devtools/page/ + +# Step 2 (preferred): use `websocat` for a real protocol-level handshake + one CDP round-trip. +# brew install websocat # macOS; apt install websocat # Debian-derived +echo '{"id":1,"method":"Target.getTargets"}' \ + | websocat -1 -t \ + --header="CF-Access-Client-Id: ${CF_ID_CDP}" \ + --header="CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --client-pkcs12-der "$HOME/.cf-access/cdp-client.p12" \ + "${WS_URL}" \ + | jq '.result.targetInfos | length' # MUST be ≥ 1 (at least the current page is a target) + +# Step 2 (fallback if websocat unavailable): curl HTTP/1.1 GET with Upgrade headers. +# We swap scheme wss://→https:// so curl accepts the URL; -N disables output buffering; +# `-i` shows headers; do NOT use `-I` (HEAD). +KEY=$(openssl rand -base64 16) +curl --http1.1 -i -s -N --max-time 5 \ + -H "Connection: Upgrade" -H "Upgrade: websocket" \ + -H "Sec-WebSocket-Version: 13" -H "Sec-WebSocket-Key: ${KEY}" \ + -H "CF-Access-Client-Id: ${CF_ID_CDP}" -H "CF-Access-Client-Secret: ${CF_SECRET_CDP}" \ + --cert "$HOME/.cf-access/cdp-client.crt" --key "$HOME/.cf-access/cdp-client.key" \ + "${WS_URL/wss:/https:}" 2>&1 | head -1 +# Expect: "HTTP/1.1 101 Switching Protocols" (NOT "HTTP/2 101" — that header doesn't exist.) +``` + +✅ All four Phase 4c probes match. The CDP surface is now live. + +### Phase 5 — Forced run via API +```bash +curl -X POST \ + -H "CF-Access-Client-Id: $CF_ID" \ + -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/run +sleep 240 # max single-attempt budget; retries follow §5.2 schedule (15s, 60s, 240s) + +# /api/status is Bearer-protected — same token as /api/run +curl -s -H "CF-Access-Client-Id: $CF_ID" -H "CF-Access-Client-Secret: $CF_SECRET" \ + -H "Authorization: Bearer $API_RUN_TOKEN" \ + https://autocli-api./api/status | jq +``` +✅ `last_exit_code == 0`, `rows_upserted > 0`; Supabase shows new rows; Grafana dashboard shows the run. + +### Phase 6 — Two consecutive scheduled runs +Two days, no manual intervention, `last_run_unixts` advances daily, no failed runs. + +### Failure-mode contingencies + +| Failure | Detection | Mitigation | +|---|---|---| +| Bad image rolled out | `/api/health` 503; Grafana CDP-up flatlines | Pin previous tag: `docker compose pull` with `:sha-` in override | +| Chrome profile corruption | `/api/run` fails with "LinkedIn login required" | VNC in, re-login; if volume itself broken, restore from `data/chrome-profile.bak` (a future PR adds the backup cron) | +| Cloudflare Tunnel disconnect | Public 502 | `docker restart cloudflared`; verify token validity | +| Supabase rate limit / 429 | `run-daily.sh` exits non-zero | Apply the §5.2 unified policy — 3 attempts at 15 s / 60 s / 240 s. On the 4th failure: record in `last_run.json`, increment `autocli_daily_runs_total{result="failure"}`, wait for the next cron tick. | +| supercronic drift (>25 h since last run) | Grafana "time since last run" panel red | `docker compose restart autocli-daily` | + +--- + +## 8. Out of Scope (Explicit) + +| Item | Reason / Future plan | +|---|---| +| Multiple LinkedIn accounts | One profile per chrome container; future PR can horizontally scale | +| Loki / log aggregation | Stick to `docker logs` + the `/api/logs` endpoint for now; revisit when a second service joins | +| Alertmanager / Slack-Discord webhooks | Grafana panels + email-on-error from a future PR | +| Indeed adapter into the same cron | Land Indeed PR first, then add a single cron line | +| HTTPS certificates on origin | Cloudflare Tunnel egress already terminates HTTPS | +| Backup of `chrome-profile` volume | Documented but not implemented in this phase | +| Multi-region failover | Single-host design; future concern | + +--- + +## 9. Risks & Open Items + +1. **CDP public exposure.** Cloudflare Access *must* be configured before bringing the CDP surface up. The implementation will refuse to add the `autocli-cdp.` hostname to the Cloudflare Tunnel dashboard until every probe in §7 **Phase 4a** passes for the other three subdomains (`vnc`, `api`, `grafana`) AND the operator has confirmed the Access Application for `autocli-cdp` exists with both policies described in §5.3 (dedicated short-lived Service Token + mTLS for machines; operator email + required WARP posture for humans). Only then does **Phase 4b** add the ingress, followed by **Phase 4c** probes. +2. **LinkedIn cookie lifetime.** Empirically 30-90 days. When it expires, `last_exit_code` becomes non-zero with a recognisable error string. Operator action: open `/vnc/` → re-login. No code change needed. +3. **Skyvern decommission.** The operator authorised stopping `skyvern-skyvern-{1,ui-1}`. Their data volumes are not deleted by this design — only the running containers. Skyvern can be re-enabled later by `docker compose up` from its own compose file if needed. +4. **``.** Spec leaves the apex hostname as a placeholder; the operator must provide it (and verify it is a Cloudflare-managed zone) before Phase 3. The 4 subdomains are flattened to **one level** — `autocli-{vnc,cdp,api,grafana}.` — because Cloudflare Universal SSL on Free plans covers only `` + one-level `*.` (two-level subdomains like `vnc.autocli.` would TLS-handshake-fail until the operator upgrades to Pro / Total TLS / ACM). `/jobs` rides on `autocli-api`, not its own subdomain. +5. **`API_RUN_TOKEN` rotation.** Generated at first deploy and stored only on the server. Rotation requires editing `.env` and `docker compose restart autocli-daily`. + +--- + +## 10. Glossary + +| Term | Meaning | +|---|---| +| **Stagehand image** | The operator's locally-built `my-stagehand-app-chrome` image — Chromium + Xvfb + x11vnc + noVNC + socat in a single container. Renamed to `autocli-chrome` in this design. | +| **Pull-based deploy** | CI pushes new image tags to GHCR; Watchtower on the server polls every 5 min and recreates containers labelled `com.centurylinklabs.watchtower.enable=true`. | +| **Cloudflare Access** | Identity gate in front of a Cloudflare Tunnel — verifies the caller before passing traffic to the origin. | +| **CDP** | Chrome DevTools Protocol — JSON-over-WebSocket API to control Chromium. | diff --git a/deploy/chrome/Dockerfile b/deploy/chrome/Dockerfile new file mode 100644 index 0000000..efdbce1 --- /dev/null +++ b/deploy/chrome/Dockerfile @@ -0,0 +1,41 @@ +FROM debian:bookworm-slim + +# Install Chromium and dependencies +RUN apt-get update && apt-get install -y \ + chromium \ + curl \ + wget \ + ca-certificates \ + fonts-liberation \ + fonts-noto-cjk \ + fonts-noto-color-emoji \ + libnss3 libgtk-3-0 libdrm2 libgbm1 libasound2 \ + pulseaudio \ + xdg-utils \ + xvfb \ + x11-utils \ + x11-xserver-utils \ + xterm \ + x11vnc \ + novnc \ + websockify \ + autocutsel \ + xclip \ + x11-apps \ + supervisor \ + socat \ + tini \ + --no-install-recommends && \ + rm -rf /var/lib/apt/lists/* + +# Create user data directory +RUN mkdir -p /root/.config/chromium && \ + chmod -R 755 /root/.config/chromium && \ + mkdir -p /tmp/vnc + +COPY deploy/chrome/entrypoint-vnc.sh /usr/local/bin/entrypoint-vnc.sh +RUN chmod +x /usr/local/bin/entrypoint-vnc.sh + +EXPOSE 9222 5900 6080 + +ENTRYPOINT ["tini", "--", "/usr/local/bin/entrypoint-vnc.sh"] diff --git a/deploy/chrome/entrypoint-vnc.sh b/deploy/chrome/entrypoint-vnc.sh new file mode 100755 index 0000000..d6f5824 --- /dev/null +++ b/deploy/chrome/entrypoint-vnc.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Docker Chrome (VNC 可视化模式) 启动脚本 +# 1) Xvfb 虚拟显示 2) x11vnc + autocutsel 剪贴板桥 3) noVNC 4) socat CDP 5) Chromium + +set -u # 不用 -e:剪贴板/工具行失败不应阻断主服务 + +cleanup_locks() { + rm -f /root/.config/chromium/SingletonLock \ + /root/.config/chromium/SingletonCookie \ + /root/.config/chromium/SingletonSocket \ + /tmp/.X99-lock 2>/dev/null + rm -f /tmp/.X11-unix/X99 2>/dev/null +} + +cleanup_locks + +# Xvfb 必须 ready 才能往后走 +Xvfb :99 -screen 0 1920x1080x24 -ac +extension RANDR & +XVFB_PID=$! +for i in $(seq 1 30); do + if xdpyinfo -display :99 >/dev/null 2>&1; then break; fi + sleep 0.3 +done +echo "[entrypoint] Xvfb ready (PID=$XVFB_PID)" + +# 剪贴板双向同步(VNC ↔ X11 CLIPBOARD/PRIMARY) +export DISPLAY=:99 +autocutsel -fork || echo "[entrypoint] autocutsel CLIPBOARD failed (non-fatal)" +autocutsel -selection PRIMARY -fork || echo "[entrypoint] autocutsel PRIMARY failed (non-fatal)" + +# VNC 服务 +mkdir -p /root/.vnc +x11vnc -storepasswd "${VNC_PASSWORD:-stagehand}" /root/.vnc/passwd >/dev/null +x11vnc -display :99 \ + -forever -shared \ + -rfbauth /root/.vnc/passwd \ + -rfbport 5900 \ + -bg \ + -o /tmp/x11vnc.log +# -nopw removed: it was overriding -rfbauth and leaving VNC open with no +# password. Anyone reaching :5900 (native VNC) or :6080 (noVNC web) could +# drive the logged-in browser. Now password auth from /root/.vnc/passwd is +# actually enforced. The compose file additionally binds the host ports to +# 127.0.0.1 so the only public path is Cloudflare Tunnel + Access. +echo "[entrypoint] x11vnc on :5900" + +# noVNC web 网关 +websockify --web=/usr/share/novnc 6080 127.0.0.1:5900 \ + > /tmp/novnc.log 2>&1 & +echo "[entrypoint] noVNC on :6080 (PID=$!)" + +# socat:宿主访问 9222 → Chrome 在 127.0.0.1:9223(绕 Chrome DNS-rebinding) +socat TCP-LISTEN:9222,fork,reuseaddr,bind=0.0.0.0 TCP:127.0.0.1:9223 \ + > /tmp/socat.log 2>&1 & +echo "[entrypoint] socat 9222→9223 (PID=$!)" + +# 扩展加载:扫描 /opt/extensions/*/manifest.json,拼成 --load-extension=a,b,c +EXT_DIRS="" +if [ -d /opt/extensions ]; then + for d in /opt/extensions/*/; do + [ -f "$d/manifest.json" ] && EXT_DIRS="${EXT_DIRS:+$EXT_DIRS,}${d%/}" + done +fi +EXT_FLAG="" +[ -n "$EXT_DIRS" ] && EXT_FLAG="--load-extension=$EXT_DIRS" && echo "[entrypoint] loading extensions: $EXT_DIRS" + +# 主进程:Chromium,会成为 PID 1 (因为 tini 作 init) +echo "[entrypoint] starting chromium ..." +exec chromium \ + --display=:99 \ + --no-first-run \ + --no-default-browser-check \ + --no-sandbox \ + --disable-gpu \ + --disable-dev-shm-usage \ + --remote-debugging-port=9223 \ + --remote-debugging-address=127.0.0.1 \ + --remote-allow-origins=* \ + --user-data-dir=/root/.config/chromium \ + --disable-blink-features=AutomationControlled \ + --use-fake-ui-for-media-stream \ + --use-fake-device-for-media-stream \ + --enable-usermedia-screen-capturing \ + --disable-features=Translate,OptimizationHints,MediaRouter \ + --password-store=basic \ + --lang=en-US \ + $EXT_FLAG \ + --window-position=0,0 \ + --window-size=1920,1080 \ + --start-maximized diff --git a/deploy/daily/Dockerfile b/deploy/daily/Dockerfile new file mode 100644 index 0000000..eee32d5 --- /dev/null +++ b/deploy/daily/Dockerfile @@ -0,0 +1,61 @@ +# syntax=docker/dockerfile:1.7 +FROM python:3.12-slim-bookworm + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PROJECT_ENVIRONMENT=/opt/venv \ + PATH=/opt/venv/bin:/usr/local/bin:/usr/bin:/bin + +# OS deps: tini for PID-1, jq for cdp-discover, curl for healthcheck, util-linux for flock +RUN apt-get update && apt-get install -y --no-install-recommends \ + tini curl jq ca-certificates util-linux tzdata \ + && rm -rf /var/lib/apt/lists/* + +# supercronic: container-friendly cron +ARG SUPERCRONIC_VERSION=v0.2.30 +ARG SUPERCRONIC_SHA1SUM=9f27ad28c5c57cd133325b2a66bba69ba2235799 +RUN curl -fsSLO "https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-amd64" \ + && echo "${SUPERCRONIC_SHA1SUM} supercronic-linux-amd64" | sha1sum -c - \ + && mv supercronic-linux-amd64 /usr/local/bin/supercronic \ + && chmod +x /usr/local/bin/supercronic + +# uv (Astral) — single static binary +RUN curl -LsSf https://astral.sh/uv/install.sh | env INSTALLER_NO_MODIFY_PATH=1 sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv + +WORKDIR /app + +# Python deps first (cache-friendly) +COPY deploy/daily/api/pyproject.toml deploy/daily/api/uv.lock* /app/api/ +RUN cd /app/api && uv sync --frozen --no-dev || uv sync --no-dev + +# Shipped sync script, priority scorer, and sponsor filter +COPY scripts/sync_autocli_jobs.py scripts/job_priority_scorer.py scripts/job_priority_config.py scripts/sponsor_filter.py /app/scripts/ + +# FastAPI app +COPY deploy/daily/api /app/api + +# Shell glue +COPY deploy/daily/cdp-discover.sh deploy/daily/run-daily.sh deploy/daily/entrypoint.sh /app/ +RUN chmod +x /app/cdp-discover.sh /app/run-daily.sh /app/entrypoint.sh + +COPY deploy/daily/crontab /etc/cron.d/autocli + +# Pre-built autocli binary (produced by Phase 0 docker-rust step OR CI build-autocli-binary job) +COPY deploy/daily/bin/autocli /app/bin/autocli +RUN chmod +x /app/bin/autocli + +# Writable runtime dirs +RUN mkdir -p /data/output /data/logs /run && \ + install -m 0644 /dev/null /data/logs/.keep && \ + install -m 0644 /dev/null /data/output/.keep + +ENV TZ=Europe/London +# CRON_SCHEDULE / OUTPUT_RETENTION_DAYS were here previously but supercronic +# reads /etc/cron.d/autocli verbatim and does NOT do env substitution, so +# the env vars were misleading placebos. Schedule lives in deploy/daily/crontab. + +EXPOSE 8080 + +ENTRYPOINT ["tini", "--", "/app/entrypoint.sh"] diff --git a/deploy/daily/api/__init__.py b/deploy/daily/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deploy/daily/api/main.py b/deploy/daily/api/main.py new file mode 100644 index 0000000..9b8b6ed --- /dev/null +++ b/deploy/daily/api/main.py @@ -0,0 +1,180 @@ +"""FastAPI control plane for autocli-daily. + +Routes (mounted per SPEC §5.1): + GET /api/status [Bearer] last_run.json + POST /api/run [Bearer] spawn run-daily.sh (flock-protected) + GET /api/logs [Bearer] tail -n 200 latest log + GET /api/metrics [open] Prometheus exposition + GET /api/health [open] chrome reachability + cdp endpoint sanity + GET /jobs [Bearer] Supabase read proxy +""" +from __future__ import annotations + +import json +import os +import time +from pathlib import Path +from typing import Annotated + +import httpx +from fastapi import Depends, FastAPI, HTTPException, Query, Response, status +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, generate_latest + +import trigger + +# ── config ─────────────────────────────────────────────────────────── +API_RUN_TOKEN = os.environ["API_RUN_TOKEN"] +SUPABASE_URL = os.environ["SUPABASE_URL"] +SUPABASE_ANON_KEY = os.environ["SUPABASE_ANON_KEY"] +CHROME_HOST = os.environ.get("CHROME_HOST", "autocli-chrome") +CHROME_PORT = int(os.environ.get("CHROME_PORT", "9222")) +LAST_RUN_PATH = Path("/data/output/last_run.json") +LOGS_DIR = Path("/data/logs") +CDP_ENDPOINT_FILE = Path("/run/cdp-endpoint.env") + +# ── metrics ────────────────────────────────────────────────────────── +M_RUNS_TOTAL = Counter( + "autocli_daily_runs_total", + "Run outcomes", + labelnames=("result",), +) +M_LAST_RUN_UNIXTS = Gauge("autocli_daily_last_run_unixts", "Unix ts of last run start") +M_LAST_DURATION = Gauge("autocli_daily_last_duration_seconds", "Duration of last run") +M_LAST_EXIT_CODE = Gauge("autocli_daily_last_exit_code", "Exit code of last run") +M_RUN_IN_PROGRESS = Gauge("autocli_daily_run_in_progress", "1 if a run is in flight") +M_ROWS_SCRAPED = Counter("autocli_daily_rows_scraped_total", "Cumulative scraped rows") +M_ROWS_UPSERTED = Counter("autocli_daily_rows_upserted_total", "Cumulative upserted rows") +M_ROWS_SKIPPED = Counter("autocli_daily_rows_skipped_total", "Cumulative skipped rows") +M_CDP_UP = Gauge("autocli_chrome_cdp_up", "1 if chrome:9222 reachable") + +# Counter de-dupe key (do not double-count between scrapes) +_last_seen_counters = {"upserted": 0, "scraped": 0, "skipped": 0} + + +# ── auth ───────────────────────────────────────────────────────────── +bearer = HTTPBearer(auto_error=False) + + +def require_bearer(creds: Annotated[HTTPAuthorizationCredentials | None, Depends(bearer)]): + if creds is None or creds.scheme.lower() != "bearer" or creds.credentials != API_RUN_TOKEN: + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="missing or invalid bearer") + return True + + +# ── app ────────────────────────────────────────────────────────────── +app = FastAPI(title="autocli-daily") + + +def _read_last_run() -> dict: + if not LAST_RUN_PATH.exists(): + return {"last_run_unixts": 0, "last_exit_code": None, "rows_scraped": 0, "rows_upserted": 0, "rows_skipped": 0, "errors": []} + return json.loads(LAST_RUN_PATH.read_text()) + + +def _refresh_metrics(): + """Reflect last_run.json + chrome reachability into Prometheus gauges.""" + lr = _read_last_run() + if lr.get("last_run_unixts"): + M_LAST_RUN_UNIXTS.set(lr["last_run_unixts"]) + if lr.get("last_duration_seconds") is not None: + M_LAST_DURATION.set(lr["last_duration_seconds"]) + if lr.get("last_exit_code") is not None: + M_LAST_EXIT_CODE.set(lr["last_exit_code"]) + M_RUN_IN_PROGRESS.set(1 if trigger.is_running() else 0) + + # Counter delta — only emit increase, never decrease + for field, counter in (("rows_upserted", M_ROWS_UPSERTED), + ("rows_scraped", M_ROWS_SCRAPED), + ("rows_skipped", M_ROWS_SKIPPED)): + cur = lr.get(field, 0) + delta = cur - _last_seen_counters[field.split("_", 1)[1]] + if delta > 0: + counter.inc(delta) + _last_seen_counters[field.split("_", 1)[1]] = cur + + +@app.get("/api/health") +def health(): + try: + # Chrome DevTools rejects Host headers that aren't an IP or "localhost" + # (DNS-rebinding protection). We reach it by docker service name, so + # override the Host header to "localhost" — Chrome accepts that, and + # this is a yes/no liveness probe (we don't use the response body). + r = httpx.get( + f"http://{CHROME_HOST}:{CHROME_PORT}/json/version", + timeout=2.0, + headers={"Host": "localhost"}, + ) + chrome_ok = r.status_code == 200 + except Exception: + chrome_ok = False + M_CDP_UP.set(1 if chrome_ok else 0) + cdp_file_ok = CDP_ENDPOINT_FILE.exists() + body = {"chrome": chrome_ok, "cdp_endpoint_file": cdp_file_ok} + code = 200 if chrome_ok and cdp_file_ok else 503 + return Response(content=json.dumps(body), status_code=code, media_type="application/json") + + +@app.get("/api/metrics") +def metrics(): + _refresh_metrics() + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) + + +@app.get("/api/status") +def api_status(_: Annotated[bool, Depends(require_bearer)]): + body = _read_last_run() + body["run_in_progress"] = trigger.is_running() + return body + + +@app.post("/api/run", status_code=202) +async def api_run(_: Annotated[bool, Depends(require_bearer)]): + if trigger.is_running(): + raise HTTPException(status_code=409, detail="run already in progress") + pid = await trigger.spawn_run_daily() + return {"started_at": int(time.time()), "pid": pid} + + +@app.get("/api/logs") +def api_logs(_: Annotated[bool, Depends(require_bearer)], lines: int = Query(200, ge=1, le=10000)): + files = sorted(LOGS_DIR.glob("run-*.log")) + if not files: + return Response(content="", media_type="text/plain") + latest = files[-1] + with latest.open("rb") as fh: + fh.seek(0, 2) + size = fh.tell() + # Read up to last ~256KB and then split lines + read = min(size, 256 * 1024) + fh.seek(size - read) + data = fh.read().decode("utf-8", errors="replace") + tail = "\n".join(data.splitlines()[-lines:]) + return Response(content=tail, media_type="text/plain") + + +@app.get("/jobs") +def jobs(_: Annotated[bool, Depends(require_bearer)], + since: str = Query(..., description="ISO date — rows added (created_at) on or after this date")): + # Lazy import — supabase client takes ~100ms to construct. + # Uses SUPABASE_ANON_KEY (a real anon JWT, not service-role) so RLS on + # jobs.jobs is actually enforced. Policy `anon_read_jobs_jobs` (see + # supabase/migrations/20260516120100_enable_jobs_jobs_rls.sql) grants + # SELECT-only to anon/authenticated. + from supabase import create_client + client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY) + # Filter on created_at (database insert time), NOT post_time (LinkedIn's + # original posting date — often days/weeks old for fresh scrapes). Callers + # asking "jobs added since X" expect ingestion time. Order by created_at + # newest first so the freshest scrapes surface. + res = ( + client.schema("jobs") + .table("jobs") + .select("id, job_title, company_name, location, salary, post_time, apply_url, priority_score, created_at") + .gte("created_at", since) + .order("created_at", desc=True) + .limit(500) + .execute() + ) + return {"count": len(res.data or []), "since": since, "rows": res.data or []} diff --git a/deploy/daily/api/pyproject.toml b/deploy/daily/api/pyproject.toml new file mode 100644 index 0000000..f18d19e --- /dev/null +++ b/deploy/daily/api/pyproject.toml @@ -0,0 +1,23 @@ +[project] +name = "autocli-daily-api" +version = "0.1.0" +description = "FastAPI control plane for the autocli-daily microservice" +requires-python = ">=3.12" +dependencies = [ + "fastapi>=0.115,<0.116", + "uvicorn[standard]>=0.32,<0.33", + "httpx>=0.28,<0.29", + "supabase>=2.8,<3.0", + "prometheus-client>=0.21,<0.22", + "python-multipart>=0.0.12", +] + +[dependency-groups] +dev = [ + "pytest>=8.3,<9", + "pytest-asyncio>=0.24,<1", + "respx>=0.21,<1", +] + +[tool.uv] +package = false diff --git a/deploy/daily/api/tests/__init__.py b/deploy/daily/api/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deploy/daily/api/tests/conftest.py b/deploy/daily/api/tests/conftest.py new file mode 100644 index 0000000..4e301d5 --- /dev/null +++ b/deploy/daily/api/tests/conftest.py @@ -0,0 +1,12 @@ +"""Pytest configuration: ensure deploy/daily/api is on sys.path so +'import main' and 'import trigger' work without package context, +matching the uvicorn invocation in entrypoint.sh (cd /app/api).""" +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the api directory so 'import main' / 'import trigger' work flat. +_api_dir = str(Path(__file__).parent.parent) +if _api_dir not in sys.path: + sys.path.insert(0, _api_dir) diff --git a/deploy/daily/api/tests/test_main.py b/deploy/daily/api/tests/test_main.py new file mode 100644 index 0000000..a39ad29 --- /dev/null +++ b/deploy/daily/api/tests/test_main.py @@ -0,0 +1,114 @@ +"""Auth + route shape tests. Run via: + cd deploy/daily/api && uv run --group dev pytest -v +""" +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture +def client(tmp_path, monkeypatch): + # Required env BEFORE main is imported + monkeypatch.setenv("API_RUN_TOKEN", "test-token-abc") + monkeypatch.setenv("SUPABASE_URL", "https://example.supabase.co") + monkeypatch.setenv("SUPABASE_ANON_KEY", "test-anon") + monkeypatch.setenv("CHROME_HOST", "example-chrome") + # Redirect runtime paths + data_dir = tmp_path / "data" / "output" + data_dir.mkdir(parents=True) + logs_dir = tmp_path / "data" / "logs" + logs_dir.mkdir(parents=True) + # Force fresh import to pick up env; unregister prometheus metrics first + # to avoid "Duplicated timeseries" on repeated test-fixture setup. + import sys + from prometheus_client import REGISTRY + + # Unregister all collectors before dropping the module so the fresh + # import can re-register them without hitting duplicate errors. + collectors = list(REGISTRY._names_to_collectors.values()) + for c in set(collectors): + try: + REGISTRY.unregister(c) + except Exception: + pass + sys.modules.pop("main", None) + + import main as m + m.LAST_RUN_PATH = data_dir / "last_run.json" + m.LOGS_DIR = logs_dir + m.CDP_ENDPOINT_FILE = tmp_path / "run" / "cdp-endpoint.env" + return TestClient(m.app) + + +def test_status_requires_bearer(client): + r = client.get("/api/status") + assert r.status_code == 401 + + +def test_status_wrong_bearer(client): + r = client.get("/api/status", headers={"Authorization": "Bearer wrong"}) + assert r.status_code == 401 + + +def test_status_returns_default_when_no_last_run(client): + r = client.get("/api/status", headers={"Authorization": "Bearer test-token-abc"}) + assert r.status_code == 200 + body = r.json() + assert body["last_run_unixts"] == 0 + assert body["rows_scraped"] == 0 + assert body["run_in_progress"] is False + + +def test_status_reflects_last_run_file(client, tmp_path): + import main as m + m.LAST_RUN_PATH.write_text(json.dumps({ + "last_run_unixts": 1747958400, + "last_duration_seconds": 142.3, + "last_exit_code": 0, + "rows_scraped": 100, + "rows_upserted": 75, + "rows_skipped": 25, + "errors": [], + })) + r = client.get("/api/status", headers={"Authorization": "Bearer test-token-abc"}) + assert r.status_code == 200 + body = r.json() + assert body["last_run_unixts"] == 1747958400 + assert body["rows_upserted"] == 75 + + +def test_run_requires_bearer(client): + r = client.post("/api/run") + assert r.status_code == 401 + + +def test_logs_requires_bearer(client): + r = client.get("/api/logs") + assert r.status_code == 401 + + +def test_jobs_requires_bearer(client): + r = client.get("/jobs?since=2026-05-15") + assert r.status_code == 401 + + +def test_metrics_is_open(client): + r = client.get("/api/metrics") + assert r.status_code == 200 + assert "autocli_daily" in r.text + + +def test_health_unreachable_chrome_returns_503(client, monkeypatch): + import httpx + def bad_get(*args, **kwargs): + raise httpx.ConnectError("boom") + monkeypatch.setattr("httpx.get", bad_get) + r = client.get("/api/health") + assert r.status_code == 503 + body = r.json() + assert body["chrome"] is False diff --git a/deploy/daily/api/trigger.py b/deploy/daily/api/trigger.py new file mode 100644 index 0000000..982b18a --- /dev/null +++ b/deploy/daily/api/trigger.py @@ -0,0 +1,43 @@ +"""Subprocess wrapper for /app/run-daily.sh. + +Used by both supercronic (via crontab) and FastAPI /api/run. +Provides a synchronous "is it running?" check via flock probe +and a fire-and-forget spawn for the API path. +""" +from __future__ import annotations + +import asyncio +import fcntl +import os +import subprocess +from pathlib import Path + +LOCK_PATH = Path("/var/lock/autocli-daily.lock") +RUN_DAILY = "/app/run-daily.sh" + + +def is_running() -> bool: + """Non-destructive flock probe: returns True if another process holds the lock.""" + if not LOCK_PATH.exists(): + return False + fd = os.open(LOCK_PATH, os.O_RDWR | os.O_CREAT, 0o644) + try: + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(fd, fcntl.LOCK_UN) + return False + except BlockingIOError: + return True + finally: + os.close(fd) + + +async def spawn_run_daily() -> int: + """Spawn run-daily.sh in the background. Returns PID. Does NOT wait.""" + proc = await asyncio.create_subprocess_exec( + RUN_DAILY, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + return proc.pid diff --git a/deploy/daily/api/uv.lock b/deploy/daily/api/uv.lock new file mode 100644 index 0000000..7a1584d --- /dev/null +++ b/deploy/daily/api/uv.lock @@ -0,0 +1,1619 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "autocli-daily-api" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "fastapi" }, + { name = "httpx" }, + { name = "prometheus-client" }, + { name = "python-multipart" }, + { name = "supabase" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "respx" }, +] + +[package.metadata] +requires-dist = [ + { name = "fastapi", specifier = ">=0.115,<0.116" }, + { name = "httpx", specifier = ">=0.28,<0.29" }, + { name = "prometheus-client", specifier = ">=0.21,<0.22" }, + { name = "python-multipart", specifier = ">=0.0.12" }, + { name = "supabase", specifier = ">=2.8,<3.0" }, + { name = "uvicorn", extras = ["standard"], specifier = ">=0.32,<0.33" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=8.3,<9" }, + { name = "pytest-asyncio", specifier = ">=0.24,<1" }, + { name = "respx", specifier = ">=0.21,<1" }, +] + +[[package]] +name = "cachetools" +version = "6.2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/39/91/d9ae9a66b01102a18cd16db0cf4cd54187ffe10f0865cc80071a4104fbb3/cachetools-6.2.6.tar.gz", hash = "sha256:16c33e1f276b9a9c0b49ab5782d901e3ad3de0dd6da9bf9bcd29ac5672f2f9e6", size = 32363, upload-time = "2026-01-27T20:32:59.956Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/45/f458fa2c388e79dd9d8b9b0c99f1d31b568f27388f2fdba7bb66bbc0c6ed/cachetools-6.2.6-py3-none-any.whl", hash = "sha256:8c9717235b3c651603fff0076db52d6acbfd1b338b8ed50256092f7ce9c85bda", size = 11668, upload-time = "2026-01-27T20:32:58.527Z" }, +] + +[[package]] +name = "certifi" +version = "2026.4.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328, upload-time = "2026-04-02T09:26:24.331Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061, upload-time = "2026-04-02T09:26:25.568Z" }, + { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031, upload-time = "2026-04-02T09:26:26.865Z" }, + { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239, upload-time = "2026-04-02T09:26:28.044Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589, upload-time = "2026-04-02T09:26:29.239Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733, upload-time = "2026-04-02T09:26:30.5Z" }, + { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652, upload-time = "2026-04-02T09:26:31.709Z" }, + { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229, upload-time = "2026-04-02T09:26:33.282Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552, upload-time = "2026-04-02T09:26:34.845Z" }, + { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806, upload-time = "2026-04-02T09:26:36.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316, upload-time = "2026-04-02T09:26:37.672Z" }, + { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274, upload-time = "2026-04-02T09:26:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468, upload-time = "2026-04-02T09:26:40.17Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460, upload-time = "2026-04-02T09:26:41.416Z" }, + { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330, upload-time = "2026-04-02T09:26:42.554Z" }, + { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828, upload-time = "2026-04-02T09:26:44.075Z" }, + { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" }, + { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" }, + { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" }, + { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" }, + { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" }, + { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" }, + { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" }, + { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" }, + { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" }, + { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" }, + { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" }, + { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" }, + { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" }, + { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" }, + { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" }, + { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" }, + { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" }, + { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" }, + { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" }, + { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" }, + { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" }, + { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, +] + +[[package]] +name = "click" +version = "8.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cryptography" +version = "48.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" }, + { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" }, + { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" }, + { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" }, + { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" }, + { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" }, + { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" }, + { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" }, + { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" }, + { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" }, + { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" }, + { url = "https://files.pythonhosted.org/packages/6b/84/70e3feea9feea87fd7cbe77efb2712ae1e3e6edf10749dc6e95f4e60e455/cryptography-48.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec", size = 7986556, upload-time = "2026-05-04T22:58:11.172Z" }, + { url = "https://files.pythonhosted.org/packages/89/6e/18e07a618bb5442ba10cf4df16e99c071365528aa570dfcb8c02e25a303b/cryptography-48.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18", size = 4684776, upload-time = "2026-05-04T22:58:13.712Z" }, + { url = "https://files.pythonhosted.org/packages/be/6a/4ea3b4c6c6759794d5ee2103c304a5076dc4b19ae1f9fe47dba439e159e9/cryptography-48.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20", size = 4698121, upload-time = "2026-05-04T22:58:16.448Z" }, + { url = "https://files.pythonhosted.org/packages/2f/59/6ff6ad6cae03bb887da2a5860b2c9805f8dac969ef01ce563336c49bd1d1/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff", size = 4690042, upload-time = "2026-05-04T22:58:18.544Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b4/fc334ed8cfd705aca282fe4d8f5ae64a8e0f74932e9feecb344610cf6e4d/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c", size = 5282526, upload-time = "2026-05-04T22:58:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/11/08/9f8c5386cc4cd90d8255c7cdd0f5baf459a08502a09de30dc51f553d38dc/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db", size = 4733116, upload-time = "2026-05-04T22:58:23.627Z" }, + { url = "https://files.pythonhosted.org/packages/b8/77/99307d7574045699f8805aa500fa0fb83422d115b5400a064ddd306d7750/cryptography-48.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741", size = 4316030, upload-time = "2026-05-04T22:58:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/fd/36/a608b98337af3cb2aff4818e406649d30572b7031918b04c87d979495348/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166", size = 4689640, upload-time = "2026-05-04T22:58:27.747Z" }, + { url = "https://files.pythonhosted.org/packages/dd/a6/825010a291b4438aecc1f568bc428189fc1175515223632477c07dc0a6df/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336", size = 5237657, upload-time = "2026-05-04T22:58:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/b9/09/4e76a09b4caa29aad535ddc806f5d4c5d01885bd978bd984fbc6ca032cae/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057", size = 4732362, upload-time = "2026-05-04T22:58:32.009Z" }, + { url = "https://files.pythonhosted.org/packages/18/78/444fa04a77d0cb95f417dda20d450e13c56ba8e5220fc892a1658f44f882/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae", size = 4819580, upload-time = "2026-05-04T22:58:34.254Z" }, + { url = "https://files.pythonhosted.org/packages/38/85/ea67067c70a1fd4be2c63d35eeed82658023021affccc7b17705f8527dd2/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c", size = 4963283, upload-time = "2026-05-04T22:58:36.376Z" }, + { url = "https://files.pythonhosted.org/packages/75/54/cc6d0f3deac3e81c7f847e8a189a12b6cdd65059b43dad25d4316abd849a/cryptography-48.0.0-cp314-cp314t-win32.whl", hash = "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f", size = 3270954, upload-time = "2026-05-04T22:58:38.791Z" }, + { url = "https://files.pythonhosted.org/packages/49/67/cc947e288c0758a4e5473d1dcb743037ab7785541265a969240b8885441a/cryptography-48.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12", size = 3797313, upload-time = "2026-05-04T22:58:40.746Z" }, + { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" }, + { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" }, + { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" }, + { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" }, + { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" }, + { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" }, + { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" }, + { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" }, + { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" }, + { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" }, + { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" }, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788, upload-time = "2020-04-20T14:23:38.738Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, +] + +[[package]] +name = "fastapi" +version = "0.115.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/53/8c38a874844a8b0fa10dd8adf3836ac154082cf88d3f22b544e9ceea0a15/fastapi-0.115.14.tar.gz", hash = "sha256:b1de15cdc1c499a4da47914db35d0e4ef8f1ce62b624e94e0e5824421df99739", size = 296263, upload-time = "2025-06-26T15:29:08.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/50/b1222562c6d270fea83e9c9075b8e8600b8479150a18e4516a6138b980d1/fastapi-0.115.14-py3-none-any.whl", hash = "sha256:6c0c8bf9420bd58f565e585036d971872472b4f7d3f6c73b698e10cffdefb3ca", size = 95514, upload-time = "2025-06-26T15:29:06.49Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/8d/1c51c094345df128ca4a990d633fe1a0ff28726c9e6b3c41ba65087bba1d/fsspec-2026.4.0.tar.gz", hash = "sha256:301d8ac70ae90ef3ad05dcf94d6c3754a097f9b5fe4667d2787aa359ec7df7e4", size = 312760, upload-time = "2026-04-29T20:42:38.635Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/0c/043d5e551459da400957a1395e0febbf771446ff34291afcbe3d8be2a279/fsspec-2026.4.0-py3-none-any.whl", hash = "sha256:11ef7bb35dab8a394fde6e608221d5cf3e8499401c249bebaeaad760a1a8dec2", size = 203402, upload-time = "2026-04-29T20:42:36.842Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" }, + { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" }, + { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" }, + { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" }, + { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/34/50/9d095fcbb6de2d523e027a2f304d4551855c2f46e0b82befd718b8b20056/httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270", size = 203619, upload-time = "2025-10-10T03:54:54.321Z" }, + { url = "https://files.pythonhosted.org/packages/07/f0/89720dc5139ae54b03f861b5e2c55a37dba9a5da7d51e1e824a1f343627f/httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3", size = 108714, upload-time = "2025-10-10T03:54:55.163Z" }, + { url = "https://files.pythonhosted.org/packages/b3/cb/eea88506f191fb552c11787c23f9a405f4c7b0c5799bf73f2249cd4f5228/httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1", size = 472909, upload-time = "2025-10-10T03:54:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/a548bdfae6369c0d078bab5769f7b66f17f1bfaa6fa28f81d6be6959066b/httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b", size = 470831, upload-time = "2025-10-10T03:54:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/14df99e1c43bd132eec921c2e7e11cda7852f65619bc0fc5bdc2d0cb126c/httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60", size = 452631, upload-time = "2025-10-10T03:54:58.219Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/b7e131f7be8d854d48cb6d048113c30f9a46dca0c9a8b08fcb3fcd588cdc/httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca", size = 452910, upload-time = "2025-10-10T03:54:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/53/cf/878f3b91e4e6e011eff6d1fa9ca39f7eb17d19c9d7971b04873734112f30/httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96", size = 88205, upload-time = "2025-10-10T03:55:00.389Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + +[[package]] +name = "idna" +version = "3.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454, upload-time = "2026-05-07T12:08:28.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mmh3" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/1a/edb23803a168f070ded7a3014c6d706f63b90c84ccc024f89d794a3b7a6d/mmh3-5.2.1.tar.gz", hash = "sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad", size = 33775, upload-time = "2026-03-05T15:55:57.716Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/94/bc5c3b573b40a328c4d141c20e399039ada95e5e2a661df3425c5165fd84/mmh3-5.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0cc21533878e5586b80d74c281d7f8da7932bc8ace50b8d5f6dbf7e3935f63f1", size = 56087, upload-time = "2026-03-05T15:54:21.92Z" }, + { url = "https://files.pythonhosted.org/packages/f6/80/64a02cc3e95c3af0aaa2590849d9ed24a9f14bb93537addde688e039b7c3/mmh3-5.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4eda76074cfca2787c8cf1bec603eaebdddd8b061ad5502f85cddae998d54f00", size = 40500, upload-time = "2026-03-05T15:54:22.953Z" }, + { url = "https://files.pythonhosted.org/packages/8b/72/e6d6602ce18adf4ddcd0e48f2e13590cc92a536199e52109f46f259d3c46/mmh3-5.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:eee884572b06bbe8a2b54f424dbd996139442cf83c76478e1ec162512e0dd2c7", size = 40034, upload-time = "2026-03-05T15:54:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/59/c2/bf4537a8e58e21886ef16477041238cab5095c836496e19fafc34b7445d2/mmh3-5.2.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d0b7e803191db5f714d264044e06189c8ccd3219e936cc184f07106bd17fd7b", size = 97292, upload-time = "2026-03-05T15:54:25.335Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e2/51ed62063b44d10b06d975ac87af287729eeb5e3ed9772f7584a17983e90/mmh3-5.2.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e6c219e375f6341d0959af814296372d265a8ca1af63825f65e2e87c618f006", size = 103274, upload-time = "2026-03-05T15:54:26.44Z" }, + { url = "https://files.pythonhosted.org/packages/75/ce/12a7524dca59eec92e5b31fdb13ede1e98eda277cf2b786cf73bfbc24e81/mmh3-5.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26fb5b9c3946bf7f1daed7b37e0c03898a6f062149127570f8ede346390a0825", size = 106158, upload-time = "2026-03-05T15:54:28.578Z" }, + { url = "https://files.pythonhosted.org/packages/86/1f/d3ba6dd322d01ab5d44c46c8f0c38ab6bbbf9b5e20e666dfc05bf4a23604/mmh3-5.2.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3c38d142c706201db5b2345166eeef1e7740e3e2422b470b8ba5c8727a9b4c7a", size = 113005, upload-time = "2026-03-05T15:54:29.767Z" }, + { url = "https://files.pythonhosted.org/packages/b6/a9/15d6b6f913294ea41b44d901741298e3718e1cb89ee626b3694625826a43/mmh3-5.2.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50885073e2909251d4718634a191c49ae5f527e5e1736d738e365c3e8be8f22b", size = 120744, upload-time = "2026-03-05T15:54:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/70b73923fd0284c439860ff5c871b20210dfdbe9a6b9dd0ee6496d77f174/mmh3-5.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3f99e1756fc48ad507b95e5d86f2fb21b3d495012ff13e6592ebac14033f166", size = 99111, upload-time = "2026-03-05T15:54:32.353Z" }, + { url = "https://files.pythonhosted.org/packages/dd/38/99f7f75cd27d10d8b899a1caafb9d531f3903e4d54d572220e3d8ac35e89/mmh3-5.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:62815d2c67f2dd1be76a253d88af4e1da19aeaa1820146dec52cf8bee2958b16", size = 98623, upload-time = "2026-03-05T15:54:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/fd/68/6e292c0853e204c44d2f03ea5f090be3317a0e2d9417ecb62c9eb27687df/mmh3-5.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8f767ba0911602ddef289404e33835a61168314ebd3c729833db2ed685824211", size = 106437, upload-time = "2026-03-05T15:54:35.177Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c6/fedd7284c459cfb58721d461fcf5607a4c1f5d9ab195d113d51d10164d16/mmh3-5.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:67e41a497bac88cc1de96eeba56eeb933c39d54bc227352f8455aa87c4ca4000", size = 110002, upload-time = "2026-03-05T15:54:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ac/ca8e0c19a34f5b71390171d2ff0b9f7f187550d66801a731bb68925126a4/mmh3-5.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d74a03fb57757ece25aa4b3c1c60157a1cece37a020542785f942e2f827eed5", size = 97507, upload-time = "2026-03-05T15:54:37.804Z" }, + { url = "https://files.pythonhosted.org/packages/df/94/6ebb9094cfc7ac5e7950776b9d13a66bb4a34f83814f32ba2abc9494fc68/mmh3-5.2.1-cp312-cp312-win32.whl", hash = "sha256:7374d6e3ef72afe49697ecd683f3da12f4fc06af2d75433d0580c6746d2fa025", size = 40773, upload-time = "2026-03-05T15:54:40.077Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/cd3527198cf159495966551c84a5f36805a10ac17b294f41f67b83f6a4d6/mmh3-5.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:3a9fed49c6ce4ed7e73f13182760c65c816da006debe67f37635580dfb0fae00", size = 41560, upload-time = "2026-03-05T15:54:41.148Z" }, + { url = "https://files.pythonhosted.org/packages/15/96/6fe5ebd0f970a076e3ed5512871ce7569447b962e96c125528a2f9724470/mmh3-5.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:bbfcb95d9a744e6e2827dfc66ad10e1020e0cac255eb7f85652832d5a264c2fc", size = 39313, upload-time = "2026-03-05T15:54:42.171Z" }, + { url = "https://files.pythonhosted.org/packages/25/a5/9daa0508a1569a54130f6198d5462a92deda870043624aa3ea72721aa765/mmh3-5.2.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:723b2681ed4cc07d3401bbea9c201ad4f2a4ca6ba8cddaff6789f715dd2b391e", size = 40832, upload-time = "2026-03-05T15:54:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6b/3230c6d80c1f4b766dedf280a92c2241e99f87c1504ff74205ec8cebe451/mmh3-5.2.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:3619473a0e0d329fd4aec8075628f8f616be2da41605300696206d6f36920c3d", size = 41964, upload-time = "2026-03-05T15:54:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/62/fb/648bfddb74a872004b6ee751551bfdda783fe6d70d2e9723bad84dbe5311/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6", size = 39114, upload-time = "2026-03-05T15:54:45.205Z" }, + { url = "https://files.pythonhosted.org/packages/95/c2/ab7901f87af438468b496728d11264cb397b3574d41506e71b92128e0373/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f", size = 39819, upload-time = "2026-03-05T15:54:46.509Z" }, + { url = "https://files.pythonhosted.org/packages/2f/ed/6f88dda0df67de1612f2e130ffea34cf84aaee5bff5b0aff4dbff2babe34/mmh3-5.2.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8", size = 40330, upload-time = "2026-03-05T15:54:47.864Z" }, + { url = "https://files.pythonhosted.org/packages/3d/66/7516d23f53cdf90f43fce24ab80c28f45e6851d78b46bef8c02084edf583/mmh3-5.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6", size = 56078, upload-time = "2026-03-05T15:54:48.9Z" }, + { url = "https://files.pythonhosted.org/packages/bc/34/4d152fdf4a91a132cb226b671f11c6b796eada9ab78080fb5ce1e95adaab/mmh3-5.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:19bbd3b841174ae6ed588536ab5e1b1fe83d046e668602c20266547298d939a9", size = 40498, upload-time = "2026-03-05T15:54:49.942Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4c/8e3af1b6d85a299767ec97bd923f12b06267089c1472c27c1696870d1175/mmh3-5.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03", size = 40033, upload-time = "2026-03-05T15:54:50.994Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/966ea560e32578d453c9e9db53d602cbb1d0da27317e232afa7c38ceba11/mmh3-5.2.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fd96476f04db5ceba1cfa0f21228f67c1f7402296f0e73fee3513aa680ad237b", size = 97320, upload-time = "2026-03-05T15:54:52.072Z" }, + { url = "https://files.pythonhosted.org/packages/bb/0d/2c5f9893b38aeb6b034d1a44ecd55a010148054f6a516abe53b5e4057297/mmh3-5.2.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5", size = 103299, upload-time = "2026-03-05T15:54:53.569Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fc/2ebaef4a4d4376f89761274dc274035ffd96006ab496b4ee5af9b08f21a9/mmh3-5.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593", size = 106222, upload-time = "2026-03-05T15:54:55.092Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/ea7ffe126d0ba0406622602a2d05e1e1a6841cc92fc322eb576c95b27fad/mmh3-5.2.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4", size = 113048, upload-time = "2026-03-05T15:54:56.305Z" }, + { url = "https://files.pythonhosted.org/packages/85/57/9447032edf93a64aa9bef4d9aa596400b1756f40411890f77a284f6293ca/mmh3-5.2.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1", size = 120742, upload-time = "2026-03-05T15:54:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/53/82/a86cc87cc88c92e9e1a598fee509f0409435b57879a6129bf3b3e40513c7/mmh3-5.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:169e0d178cb59314456ab30772429a802b25d13227088085b0d49b9fe1533104", size = 99132, upload-time = "2026-03-05T15:54:58.583Z" }, + { url = "https://files.pythonhosted.org/packages/54/f7/6b16eb1b40ee89bb740698735574536bc20d6cdafc65ae702ea235578e05/mmh3-5.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e4e1f580033335c6f76d1e0d6b56baf009d1a64d6a4816347e4271ba951f46d", size = 98686, upload-time = "2026-03-05T15:55:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/e8/88/a601e9f32ad1410f438a6d0544298ea621f989bd34a0731a7190f7dec799/mmh3-5.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f", size = 106479, upload-time = "2026-03-05T15:55:01.532Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5c/ce29ae3dfc4feec4007a437a1b7435fb9507532a25147602cd5b52be86db/mmh3-5.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2", size = 110030, upload-time = "2026-03-05T15:55:02.934Z" }, + { url = "https://files.pythonhosted.org/packages/13/30/ae444ef2ff87c805d525da4fa63d27cda4fe8a48e77003a036b8461cfd5c/mmh3-5.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a", size = 97536, upload-time = "2026-03-05T15:55:04.135Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f9/dc3787ee5c813cc27fe79f45ad4500d9b5437f23a7402435cc34e07c7718/mmh3-5.2.1-cp313-cp313-win32.whl", hash = "sha256:54b64fb2433bc71488e7a449603bf8bd31fbcf9cb56fbe1eb6d459e90b86c37b", size = 40769, upload-time = "2026-03-05T15:55:05.277Z" }, + { url = "https://files.pythonhosted.org/packages/43/67/850e0b5a1e97799822ebfc4ca0e8c6ece3ed8baf7dcdf64de817dfdda2ca/mmh3-5.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:cae6383181f1e345317742d2ddd88f9e7d2682fa4c9432e3a74e47d92dce0229", size = 41563, upload-time = "2026-03-05T15:55:06.283Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/98c90b28e1da5458e19fbfaf4adb5289208d3bfccd45dd14eab216a2f0bb/mmh3-5.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:022aa1a528604e6c83d0a7705fdef0b5355d897a9e0fa3a8d26709ceaa06965d", size = 39310, upload-time = "2026-03-05T15:55:07.323Z" }, + { url = "https://files.pythonhosted.org/packages/63/b4/65bc1fb2bb7f83e91c30865023b1847cf89a5f237165575e8c83aa536584/mmh3-5.2.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:d771f085fcdf4035786adfb1d8db026df1eb4b41dac1c3d070d1e49512843227", size = 40794, upload-time = "2026-03-05T15:55:09.773Z" }, + { url = "https://files.pythonhosted.org/packages/c4/86/7168b3d83be8eb553897b1fac9da8bbb06568e5cfe555ffc329ebb46f59d/mmh3-5.2.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:7f196cd7910d71e9d9860da0ff7a77f64d22c1ad931f1dd18559a06e03109fc0", size = 41923, upload-time = "2026-03-05T15:55:10.924Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9b/b653ab611c9060ce8ff0ba25c0226757755725e789292f3ca138a58082cd/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:b1f12bd684887a0a5d55e6363ca87056f361e45451105012d329b86ec19dbe0b", size = 39131, upload-time = "2026-03-05T15:55:11.961Z" }, + { url = "https://files.pythonhosted.org/packages/9b/b4/5a2e0d34ab4d33543f01121e832395ea510132ea8e52cdf63926d9d81754/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d106493a60dcb4aef35a0fac85105e150a11cf8bc2b0d388f5a33272d756c966", size = 39825, upload-time = "2026-03-05T15:55:13.013Z" }, + { url = "https://files.pythonhosted.org/packages/bd/69/81699a8f39a3f8d368bec6443435c0c392df0d200ad915bf0d222b588e03/mmh3-5.2.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:44983e45310ee5b9f73397350251cdf6e63a466406a105f1d16cb5baa659270b", size = 40344, upload-time = "2026-03-05T15:55:14.026Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b3/71c8c775807606e8fd8acc5c69016e1caf3200d50b50b6dd4b40ce10b76c/mmh3-5.2.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:368625fb01666655985391dbad3860dc0ba7c0d6b9125819f3121ee7292b4ac8", size = 56291, upload-time = "2026-03-05T15:55:15.137Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/2c24517d4b2ce9e4917362d24f274d3d541346af764430249ddcc4cb3a08/mmh3-5.2.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:72d1cc63bcc91e14933f77d51b3df899d6a07d184ec515ea7f56bff659e124d7", size = 40575, upload-time = "2026-03-05T15:55:16.518Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/e4a360164365ac9f07a25f0f7928e3a66eb9ecc989384060747aa170e6aa/mmh3-5.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e8b4b5580280b9265af3e0409974fb79c64cf7523632d03fbf11df18f8b0181e", size = 40052, upload-time = "2026-03-05T15:55:17.735Z" }, + { url = "https://files.pythonhosted.org/packages/97/ca/120d92223a7546131bbbc31c9174168ee7a73b1366f5463ffe69d9e691fe/mmh3-5.2.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4cbbde66f1183db040daede83dd86c06d663c5bb2af6de1142b7c8c37923dd74", size = 97311, upload-time = "2026-03-05T15:55:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/b6/71/c1a60c1652b8813ef9de6d289784847355417ee0f2980bca002fe87f4ae5/mmh3-5.2.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8ff038d52ef6aa0f309feeba00c5095c9118d0abf787e8e8454d6048db2037fc", size = 103279, upload-time = "2026-03-05T15:55:20.448Z" }, + { url = "https://files.pythonhosted.org/packages/48/29/ad97f4be1509cdcb28ae32c15593ce7c415db47ace37f8fad35b493faa9a/mmh3-5.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4130d0b9ce5fad6af07421b1aecc7e079519f70d6c05729ab871794eded8617", size = 106290, upload-time = "2026-03-05T15:55:21.6Z" }, + { url = "https://files.pythonhosted.org/packages/77/29/1f86d22e281bd8827ba373600a4a8b0c0eae5ca6aa55b9a8c26d2a34decc/mmh3-5.2.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e0bfe77d238308839699944164b96a2eeccaf55f2af400f54dc20669d8d5f2", size = 113116, upload-time = "2026-03-05T15:55:22.826Z" }, + { url = "https://files.pythonhosted.org/packages/a7/7c/339971ea7ed4c12d98f421f13db3ea576a9114082ccb59d2d1a0f00ccac1/mmh3-5.2.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f963eafc0a77a6c0562397da004f5876a9bcf7265a7bcc3205e29636bc4a1312", size = 120740, upload-time = "2026-03-05T15:55:24.3Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/3c7c4bdb8e926bb3c972d1e2907d77960c1c4b250b41e8366cf20c6e4373/mmh3-5.2.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92883836caf50d5255be03d988d75bc93e3f86ba247b7ca137347c323f731deb", size = 99143, upload-time = "2026-03-05T15:55:25.456Z" }, + { url = "https://files.pythonhosted.org/packages/df/0a/33dd8706e732458c8375eae63c981292de07a406bad4ec03e5269654aa2c/mmh3-5.2.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:57b52603e89355ff318025dd55158f6e71396c0f1f609d548e9ea9c94cc6ce0a", size = 98703, upload-time = "2026-03-05T15:55:26.723Z" }, + { url = "https://files.pythonhosted.org/packages/51/04/76bbce05df76cbc3d396f13b2ea5b1578ef02b6a5187e132c6c33f99d596/mmh3-5.2.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f40a95186a72fa0b67d15fef0f157bfcda00b4f59c8a07cbe5530d41ac35d105", size = 106484, upload-time = "2026-03-05T15:55:28.214Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8f/c6e204a2c70b719c1f62ffd9da27aef2dddcba875ea9c31ca0e87b975a46/mmh3-5.2.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:58370d05d033ee97224c81263af123dea3d931025030fd34b61227a768a8858a", size = 110012, upload-time = "2026-03-05T15:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/7181efd8e39db386c1ebc3e6b7d1f702a09d7c1197a6f2742ed6b5c16597/mmh3-5.2.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7be6dfb49e48fd0a7d91ff758a2b51336f1cd21f9d44b20f6801f072bd080cdd", size = 97508, upload-time = "2026-03-05T15:55:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/42/0f/afa7ca2615fd85e1469474bb860e381443d0b868c083b62b41cb1d7ca32f/mmh3-5.2.1-cp314-cp314-win32.whl", hash = "sha256:54fe8518abe06a4c3852754bfd498b30cc58e667f376c513eac89a244ce781a4", size = 41387, upload-time = "2026-03-05T15:55:32.403Z" }, + { url = "https://files.pythonhosted.org/packages/71/0d/46d42a260ee1357db3d486e6c7a692e303c017968e14865e00efa10d09fc/mmh3-5.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:3f796b535008708846044c43302719c6956f39ca2d93f2edda5319e79a29efbb", size = 42101, upload-time = "2026-03-05T15:55:33.646Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7b/848a8378059d96501a41159fca90d6a99e89736b0afbe8e8edffeac8c74b/mmh3-5.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:cd471ede0d802dd936b6fab28188302b2d497f68436025857ca72cd3810423fe", size = 39836, upload-time = "2026-03-05T15:55:35.026Z" }, + { url = "https://files.pythonhosted.org/packages/27/61/1dabea76c011ba8547c25d30c91c0ec22544487a8750997a27a0c9e1180b/mmh3-5.2.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:5174a697ce042fa77c407e05efe41e03aa56dae9ec67388055820fb48cf4c3ba", size = 57727, upload-time = "2026-03-05T15:55:36.162Z" }, + { url = "https://files.pythonhosted.org/packages/b7/32/731185950d1cf2d5e28979cc8593016ba1619a295faba10dda664a4931b5/mmh3-5.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0a3984146e414684a6be2862d84fcb1035f4984851cb81b26d933bab6119bf00", size = 41308, upload-time = "2026-03-05T15:55:37.254Z" }, + { url = "https://files.pythonhosted.org/packages/76/aa/66c76801c24b8c9418b4edde9b5e57c75e72c94e29c48f707e3962534f18/mmh3-5.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:bd6e7d363aa93bd3421b30b6af97064daf47bc96005bddba67c5ffbc6df426b8", size = 40758, upload-time = "2026-03-05T15:55:38.61Z" }, + { url = "https://files.pythonhosted.org/packages/9e/bb/79a1f638a02f0ae389f706d13891e2fbf7d8c0a22ecde67ba828951bb60a/mmh3-5.2.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:113f78e7463a36dbbcea05bfe688efd7fa759d0f0c56e73c974d60dcfec3dfcc", size = 109670, upload-time = "2026-03-05T15:55:40.13Z" }, + { url = "https://files.pythonhosted.org/packages/26/94/8cd0e187a288985bcfc79bf5144d1d712df9dee74365f59d26e3a1865be6/mmh3-5.2.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e8ec5f606e0809426d2440e0683509fb605a8820a21ebd120dcdba61b74ef7f", size = 117399, upload-time = "2026-03-05T15:55:42.076Z" }, + { url = "https://files.pythonhosted.org/packages/42/94/dfea6059bd5c5beda565f58a4096e43f4858fb6d2862806b8bbd12cbb284/mmh3-5.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22b0f9971ec4e07e8223f2beebe96a6cfc779d940b6f27d26604040dd74d3a44", size = 120386, upload-time = "2026-03-05T15:55:43.481Z" }, + { url = "https://files.pythonhosted.org/packages/47/cb/f9c45e62aaa67220179f487772461d891bb582bb2f9783c944832c60efd9/mmh3-5.2.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85ffc9920ffc39c5eee1e3ac9100c913a0973996fbad5111f939bbda49204bb7", size = 125924, upload-time = "2026-03-05T15:55:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/a5/83/fe54a4a7c11bc9f623dfc1707decd034245602b076dfc1dcc771a4163170/mmh3-5.2.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7aec798c2b01aaa65a55f1124f3405804184373abb318a3091325aece235f67c", size = 135280, upload-time = "2026-03-05T15:55:45.866Z" }, + { url = "https://files.pythonhosted.org/packages/97/67/fe7e9e9c143daddd210cd22aef89cbc425d58ecf238d2b7d9eb0da974105/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:55dbbd8ffbc40d1697d5e2d0375b08599dae8746b0b08dea05eee4ce81648fac", size = 110050, upload-time = "2026-03-05T15:55:47.074Z" }, + { url = "https://files.pythonhosted.org/packages/43/c4/6d4b09fcbef80794de447c9378e39eefc047156b290fa3dd2d5257ca8227/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6c85c38a279ca9295a69b9b088a2e48aa49737bb1b34e6a9dc6297c110e8d912", size = 111158, upload-time = "2026-03-05T15:55:48.239Z" }, + { url = "https://files.pythonhosted.org/packages/81/a6/ca51c864bdb30524beb055a6d8826db3906af0834ec8c41d097a6e8573d5/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:6290289fa5fb4c70fd7f72016e03633d60388185483ff3b162912c81205ae2cf", size = 116890, upload-time = "2026-03-05T15:55:49.405Z" }, + { url = "https://files.pythonhosted.org/packages/cc/04/5a1fe2e2ad843d03e89af25238cbc4f6840a8bb6c4329a98ab694c71deda/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4fc6cd65dc4d2fdb2625e288939a3566e36127a84811a4913f02f3d5931da52d", size = 123121, upload-time = "2026-03-05T15:55:50.61Z" }, + { url = "https://files.pythonhosted.org/packages/af/4d/3c820c6f4897afd25905270a9f2330a23f77a207ea7356f7aadace7273c0/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:623f938f6a039536cc02b7582a07a080f13fdfd48f87e63201d92d7e34d09a18", size = 110187, upload-time = "2026-03-05T15:55:52.143Z" }, + { url = "https://files.pythonhosted.org/packages/21/54/1d71cd143752361c0aebef16ad3f55926a6faf7b112d355745c1f8a25f7f/mmh3-5.2.1-cp314-cp314t-win32.whl", hash = "sha256:29bc3973676ae334412efdd367fcd11d036b7be3efc1ce2407ef8676dabfeb82", size = 41934, upload-time = "2026-03-05T15:55:53.564Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e4/63a2a88f31d93dea03947cccc2a076946857e799ea4f7acdecbf43b324aa/mmh3-5.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:28cfab66577000b9505a0d068c731aee7ca85cd26d4d63881fab17857e0fe1fb", size = 43036, upload-time = "2026-03-05T15:55:55.252Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, +] + +[[package]] +name = "multidict" +version = "6.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" }, + { url = "https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" }, + { url = "https://files.pythonhosted.org/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" }, + { url = "https://files.pythonhosted.org/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" }, + { url = "https://files.pythonhosted.org/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" }, + { url = "https://files.pythonhosted.org/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" }, + { url = "https://files.pythonhosted.org/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" }, + { url = "https://files.pythonhosted.org/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" }, + { url = "https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" }, + { url = "https://files.pythonhosted.org/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" }, + { url = "https://files.pythonhosted.org/packages/f2/22/929c141d6c0dba87d3e1d38fbdf1ba8baba86b7776469f2bc2d3227a1e67/multidict-6.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23", size = 76174, upload-time = "2026-01-26T02:44:18.509Z" }, + { url = "https://files.pythonhosted.org/packages/c7/75/bc704ae15fee974f8fccd871305e254754167dce5f9e42d88a2def741a1d/multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2", size = 45116, upload-time = "2026-01-26T02:44:19.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/76/55cd7186f498ed080a18440c9013011eb548f77ae1b297206d030eb1180a/multidict-6.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445", size = 43524, upload-time = "2026-01-26T02:44:21.571Z" }, + { url = "https://files.pythonhosted.org/packages/e9/3c/414842ef8d5a1628d68edee29ba0e5bcf235dbfb3ccd3ea303a7fe8c72ff/multidict-6.7.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177", size = 249368, upload-time = "2026-01-26T02:44:22.803Z" }, + { url = "https://files.pythonhosted.org/packages/f6/32/befed7f74c458b4a525e60519fe8d87eef72bb1e99924fa2b0f9d97a221e/multidict-6.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23", size = 256952, upload-time = "2026-01-26T02:44:24.306Z" }, + { url = "https://files.pythonhosted.org/packages/03/d6/c878a44ba877f366630c860fdf74bfb203c33778f12b6ac274936853c451/multidict-6.7.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060", size = 240317, upload-time = "2026-01-26T02:44:25.772Z" }, + { url = "https://files.pythonhosted.org/packages/68/49/57421b4d7ad2e9e60e25922b08ceb37e077b90444bde6ead629095327a6f/multidict-6.7.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d", size = 267132, upload-time = "2026-01-26T02:44:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/b7/fe/ec0edd52ddbcea2a2e89e174f0206444a61440b40f39704e64dc807a70bd/multidict-6.7.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed", size = 268140, upload-time = "2026-01-26T02:44:29.588Z" }, + { url = "https://files.pythonhosted.org/packages/b0/73/6e1b01cbeb458807aa0831742232dbdd1fa92bfa33f52a3f176b4ff3dc11/multidict-6.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429", size = 254277, upload-time = "2026-01-26T02:44:30.902Z" }, + { url = "https://files.pythonhosted.org/packages/6a/b2/5fb8c124d7561a4974c342bc8c778b471ebbeb3cc17df696f034a7e9afe7/multidict-6.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6", size = 252291, upload-time = "2026-01-26T02:44:32.31Z" }, + { url = "https://files.pythonhosted.org/packages/5a/96/51d4e4e06bcce92577fcd488e22600bd38e4fd59c20cb49434d054903bd2/multidict-6.7.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9", size = 250156, upload-time = "2026-01-26T02:44:33.734Z" }, + { url = "https://files.pythonhosted.org/packages/db/6b/420e173eec5fba721a50e2a9f89eda89d9c98fded1124f8d5c675f7a0c0f/multidict-6.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c", size = 249742, upload-time = "2026-01-26T02:44:35.222Z" }, + { url = "https://files.pythonhosted.org/packages/44/a3/ec5b5bd98f306bc2aa297b8c6f11a46714a56b1e6ef5ebda50a4f5d7c5fb/multidict-6.7.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84", size = 262221, upload-time = "2026-01-26T02:44:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/cd/f7/e8c0d0da0cd1e28d10e624604e1a36bcc3353aaebdfdc3a43c72bc683a12/multidict-6.7.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d", size = 258664, upload-time = "2026-01-26T02:44:38.008Z" }, + { url = "https://files.pythonhosted.org/packages/52/da/151a44e8016dd33feed44f730bd856a66257c1ee7aed4f44b649fb7edeb3/multidict-6.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33", size = 249490, upload-time = "2026-01-26T02:44:39.386Z" }, + { url = "https://files.pythonhosted.org/packages/87/af/a3b86bf9630b732897f6fc3f4c4714b90aa4361983ccbdcd6c0339b21b0c/multidict-6.7.1-cp313-cp313-win32.whl", hash = "sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3", size = 41695, upload-time = "2026-01-26T02:44:41.318Z" }, + { url = "https://files.pythonhosted.org/packages/b2/35/e994121b0e90e46134673422dd564623f93304614f5d11886b1b3e06f503/multidict-6.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5", size = 45884, upload-time = "2026-01-26T02:44:42.488Z" }, + { url = "https://files.pythonhosted.org/packages/ca/61/42d3e5dbf661242a69c97ea363f2d7b46c567da8eadef8890022be6e2ab0/multidict-6.7.1-cp313-cp313-win_arm64.whl", hash = "sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df", size = 43122, upload-time = "2026-01-26T02:44:43.664Z" }, + { url = "https://files.pythonhosted.org/packages/6d/b3/e6b21c6c4f314bb956016b0b3ef2162590a529b84cb831c257519e7fde44/multidict-6.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1", size = 83175, upload-time = "2026-01-26T02:44:44.894Z" }, + { url = "https://files.pythonhosted.org/packages/fb/76/23ecd2abfe0957b234f6c960f4ade497f55f2c16aeb684d4ecdbf1c95791/multidict-6.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963", size = 48460, upload-time = "2026-01-26T02:44:46.106Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/a0ed92b23f3a042c36bc4227b72b97eca803f5f1801c1ab77c8a212d455e/multidict-6.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34", size = 46930, upload-time = "2026-01-26T02:44:47.278Z" }, + { url = "https://files.pythonhosted.org/packages/b5/66/02ec7ace29162e447f6382c495dc95826bf931d3818799bbef11e8f7df1a/multidict-6.7.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65", size = 242582, upload-time = "2026-01-26T02:44:48.604Z" }, + { url = "https://files.pythonhosted.org/packages/58/18/64f5a795e7677670e872673aca234162514696274597b3708b2c0d276cce/multidict-6.7.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292", size = 250031, upload-time = "2026-01-26T02:44:50.544Z" }, + { url = "https://files.pythonhosted.org/packages/c8/ed/e192291dbbe51a8290c5686f482084d31bcd9d09af24f63358c3d42fd284/multidict-6.7.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43", size = 228596, upload-time = "2026-01-26T02:44:51.951Z" }, + { url = "https://files.pythonhosted.org/packages/1e/7e/3562a15a60cf747397e7f2180b0a11dc0c38d9175a650e75fa1b4d325e15/multidict-6.7.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca", size = 257492, upload-time = "2026-01-26T02:44:53.902Z" }, + { url = "https://files.pythonhosted.org/packages/24/02/7d0f9eae92b5249bb50ac1595b295f10e263dd0078ebb55115c31e0eaccd/multidict-6.7.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd", size = 255899, upload-time = "2026-01-26T02:44:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/00/e3/9b60ed9e23e64c73a5cde95269ef1330678e9c6e34dd4eb6b431b85b5a10/multidict-6.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7", size = 247970, upload-time = "2026-01-26T02:44:56.783Z" }, + { url = "https://files.pythonhosted.org/packages/3e/06/538e58a63ed5cfb0bd4517e346b91da32fde409d839720f664e9a4ae4f9d/multidict-6.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3", size = 245060, upload-time = "2026-01-26T02:44:58.195Z" }, + { url = "https://files.pythonhosted.org/packages/b2/2f/d743a3045a97c895d401e9bd29aaa09b94f5cbdf1bd561609e5a6c431c70/multidict-6.7.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4", size = 235888, upload-time = "2026-01-26T02:44:59.57Z" }, + { url = "https://files.pythonhosted.org/packages/38/83/5a325cac191ab28b63c52f14f1131f3b0a55ba3b9aa65a6d0bf2a9b921a0/multidict-6.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8", size = 243554, upload-time = "2026-01-26T02:45:01.054Z" }, + { url = "https://files.pythonhosted.org/packages/20/1f/9d2327086bd15da2725ef6aae624208e2ef828ed99892b17f60c344e57ed/multidict-6.7.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c", size = 252341, upload-time = "2026-01-26T02:45:02.484Z" }, + { url = "https://files.pythonhosted.org/packages/e8/2c/2a1aa0280cf579d0f6eed8ee5211c4f1730bd7e06c636ba2ee6aafda302e/multidict-6.7.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52", size = 246391, upload-time = "2026-01-26T02:45:03.862Z" }, + { url = "https://files.pythonhosted.org/packages/e5/03/7ca022ffc36c5a3f6e03b179a5ceb829be9da5783e6fe395f347c0794680/multidict-6.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108", size = 243422, upload-time = "2026-01-26T02:45:05.296Z" }, + { url = "https://files.pythonhosted.org/packages/dc/1d/b31650eab6c5778aceed46ba735bd97f7c7d2f54b319fa916c0f96e7805b/multidict-6.7.1-cp313-cp313t-win32.whl", hash = "sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32", size = 47770, upload-time = "2026-01-26T02:45:06.754Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/2d2d1d522e51285bd61b1e20df8f47ae1a9d80839db0b24ea783b3832832/multidict-6.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8", size = 53109, upload-time = "2026-01-26T02:45:08.044Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a3/cc409ba012c83ca024a308516703cf339bdc4b696195644a7215a5164a24/multidict-6.7.1-cp313-cp313t-win_arm64.whl", hash = "sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118", size = 45573, upload-time = "2026-01-26T02:45:09.349Z" }, + { url = "https://files.pythonhosted.org/packages/91/cc/db74228a8be41884a567e88a62fd589a913708fcf180d029898c17a9a371/multidict-6.7.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8f333ec9c5eb1b7105e3b84b53141e66ca05a19a605368c55450b6ba208cb9ee", size = 75190, upload-time = "2026-01-26T02:45:10.651Z" }, + { url = "https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a407f13c188f804c759fc6a9f88286a565c242a76b27626594c133b82883b5c2", size = 44486, upload-time = "2026-01-26T02:45:11.938Z" }, + { url = "https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e161ddf326db5577c3a4cc2d8648f81456e8a20d40415541587a71620d7a7d1", size = 43219, upload-time = "2026-01-26T02:45:14.346Z" }, + { url = "https://files.pythonhosted.org/packages/24/bb/2c0c2287963f4259c85e8bcbba9182ced8d7fca65c780c38e99e61629d11/multidict-6.7.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1e3a8bb24342a8201d178c3b4984c26ba81a577c80d4d525727427460a50c22d", size = 245132, upload-time = "2026-01-26T02:45:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97231140a50f5d447d3164f994b86a0bed7cd016e2682f8650d6a9158e14fd31", size = 252420, upload-time = "2026-01-26T02:45:17.293Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/78f7275e73fa17b24c9a51b0bd9d73ba64bb32d0ed51b02a746eb876abe7/multidict-6.7.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6b10359683bd8806a200fd2909e7c8ca3a7b24ec1d8132e483d58e791d881048", size = 233510, upload-time = "2026-01-26T02:45:19.356Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/8167187f62ae3cbd52da7893f58cb036b47ea3fb67138787c76800158982/multidict-6.7.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:283ddac99f7ac25a4acadbf004cb5ae34480bbeb063520f70ce397b281859362", size = 264094, upload-time = "2026-01-26T02:45:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e7/69a3a83b7b030cf283fb06ce074a05a02322359783424d7edf0f15fe5022/multidict-6.7.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:538cec1e18c067d0e6103aa9a74f9e832904c957adc260e61cd9d8cf0c3b3d37", size = 260786, upload-time = "2026-01-26T02:45:22.818Z" }, + { url = "https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eee46ccb30ff48a1e35bb818cc90846c6be2b68240e42a78599166722cea709", size = 248483, upload-time = "2026-01-26T02:45:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/48/5a/d5a99e3acbca0e29c5d9cba8f92ceb15dce78bab963b308ae692981e3a5d/multidict-6.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa263a02f4f2dd2d11a7b1bb4362aa7cb1049f84a9235d31adf63f30143469a0", size = 248403, upload-time = "2026-01-26T02:45:25.982Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/e58cd31f6c7d5102f2a4bf89f96b9cf7e00b6c6f3d04ecc44417c00a5a3c/multidict-6.7.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:2e1425e2f99ec5bd36c15a01b690a1a2456209c5deed58f95469ffb46039ccbb", size = 240315, upload-time = "2026-01-26T02:45:27.487Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/1cd210229559cb90b6786c30676bb0c58249ff42f942765f88793b41fdce/multidict-6.7.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:497394b3239fc6f0e13a78a3e1b61296e72bf1c5f94b4c4eb80b265c37a131cd", size = 245528, upload-time = "2026-01-26T02:45:28.991Z" }, + { url = "https://files.pythonhosted.org/packages/64/f2/6e1107d226278c876c783056b7db43d800bb64c6131cec9c8dfb6903698e/multidict-6.7.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:233b398c29d3f1b9676b4b6f75c518a06fcb2ea0b925119fb2c1bc35c05e1601", size = 258784, upload-time = "2026-01-26T02:45:30.503Z" }, + { url = "https://files.pythonhosted.org/packages/4d/c1/11f664f14d525e4a1b5327a82d4de61a1db604ab34c6603bb3c2cc63ad34/multidict-6.7.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:93b1818e4a6e0930454f0f2af7dfce69307ca03cdcfb3739bf4d91241967b6c1", size = 251980, upload-time = "2026-01-26T02:45:32.603Z" }, + { url = "https://files.pythonhosted.org/packages/e1/9f/75a9ac888121d0c5bbd4ecf4eead45668b1766f6baabfb3b7f66a410e231/multidict-6.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f33dc2a3abe9249ea5d8360f969ec7f4142e7ac45ee7014d8f8d5acddf178b7b", size = 243602, upload-time = "2026-01-26T02:45:34.043Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e7/50bf7b004cc8525d80dbbbedfdc7aed3e4c323810890be4413e589074032/multidict-6.7.1-cp314-cp314-win32.whl", hash = "sha256:3ab8b9d8b75aef9df299595d5388b14530839f6422333357af1339443cff777d", size = 40930, upload-time = "2026-01-26T02:45:36.278Z" }, + { url = "https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:5e01429a929600e7dab7b166062d9bb54a5eed752384c7384c968c2afab8f50f", size = 45074, upload-time = "2026-01-26T02:45:37.546Z" }, + { url = "https://files.pythonhosted.org/packages/97/ab/22803b03285fa3a525f48217963da3a65ae40f6a1b6f6cf2768879e208f9/multidict-6.7.1-cp314-cp314-win_arm64.whl", hash = "sha256:4885cb0e817aef5d00a2e8451d4665c1808378dc27c2705f1bf4ef8505c0d2e5", size = 42471, upload-time = "2026-01-26T02:45:38.889Z" }, + { url = "https://files.pythonhosted.org/packages/e0/6d/f9293baa6146ba9507e360ea0292b6422b016907c393e2f63fc40ab7b7b5/multidict-6.7.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:0458c978acd8e6ea53c81eefaddbbee9c6c5e591f41b3f5e8e194780fe026581", size = 82401, upload-time = "2026-01-26T02:45:40.254Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/53b5494738d83558d87c3c71a486504d8373421c3e0dbb6d0db48ad42ee0/multidict-6.7.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c0abd12629b0af3cf590982c0b413b1e7395cd4ec026f30986818ab95bfaa94a", size = 48143, upload-time = "2026-01-26T02:45:41.635Z" }, + { url = "https://files.pythonhosted.org/packages/37/e8/5284c53310dcdc99ce5d66563f6e5773531a9b9fe9ec7a615e9bc306b05f/multidict-6.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:14525a5f61d7d0c94b368a42cff4c9a4e7ba2d52e2672a7b23d84dc86fb02b0c", size = 46507, upload-time = "2026-01-26T02:45:42.99Z" }, + { url = "https://files.pythonhosted.org/packages/e4/fc/6800d0e5b3875568b4083ecf5f310dcf91d86d52573160834fb4bfcf5e4f/multidict-6.7.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:17307b22c217b4cf05033dabefe68255a534d637c6c9b0cc8382718f87be4262", size = 239358, upload-time = "2026-01-26T02:45:44.376Z" }, + { url = "https://files.pythonhosted.org/packages/41/75/4ad0973179361cdf3a113905e6e088173198349131be2b390f9fa4da5fc6/multidict-6.7.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a7e590ff876a3eaf1c02a4dfe0724b6e69a9e9de6d8f556816f29c496046e59", size = 246884, upload-time = "2026-01-26T02:45:47.167Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/095bb28b5da139bd41fb9a5d5caff412584f377914bd8787c2aa98717130/multidict-6.7.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5fa6a95dfee63893d80a34758cd0e0c118a30b8dcb46372bf75106c591b77889", size = 225878, upload-time = "2026-01-26T02:45:48.698Z" }, + { url = "https://files.pythonhosted.org/packages/07/d0/c0a72000243756e8f5a277b6b514fa005f2c73d481b7d9e47cd4568aa2e4/multidict-6.7.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a0543217a6a017692aa6ae5cc39adb75e587af0f3a82288b1492eb73dd6cc2a4", size = 253542, upload-time = "2026-01-26T02:45:50.164Z" }, + { url = "https://files.pythonhosted.org/packages/c0/6b/f69da15289e384ecf2a68837ec8b5ad8c33e973aa18b266f50fe55f24b8c/multidict-6.7.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f99fe611c312b3c1c0ace793f92464d8cd263cc3b26b5721950d977b006b6c4d", size = 252403, upload-time = "2026-01-26T02:45:51.779Z" }, + { url = "https://files.pythonhosted.org/packages/a2/76/b9669547afa5a1a25cd93eaca91c0da1c095b06b6d2d8ec25b713588d3a1/multidict-6.7.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9004d8386d133b7e6135679424c91b0b854d2d164af6ea3f289f8f2761064609", size = 244889, upload-time = "2026-01-26T02:45:53.27Z" }, + { url = "https://files.pythonhosted.org/packages/7e/a9/a50d2669e506dad33cfc45b5d574a205587b7b8a5f426f2fbb2e90882588/multidict-6.7.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e628ef0e6859ffd8273c69412a2465c4be4a9517d07261b33334b5ec6f3c7489", size = 241982, upload-time = "2026-01-26T02:45:54.919Z" }, + { url = "https://files.pythonhosted.org/packages/c5/bb/1609558ad8b456b4827d3c5a5b775c93b87878fd3117ed3db3423dfbce1b/multidict-6.7.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:841189848ba629c3552035a6a7f5bf3b02eb304e9fea7492ca220a8eda6b0e5c", size = 232415, upload-time = "2026-01-26T02:45:56.981Z" }, + { url = "https://files.pythonhosted.org/packages/d8/59/6f61039d2aa9261871e03ab9dc058a550d240f25859b05b67fd70f80d4b3/multidict-6.7.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce1bbd7d780bb5a0da032e095c951f7014d6b0a205f8318308140f1a6aba159e", size = 240337, upload-time = "2026-01-26T02:45:58.698Z" }, + { url = "https://files.pythonhosted.org/packages/a1/29/fdc6a43c203890dc2ae9249971ecd0c41deaedfe00d25cb6564b2edd99eb/multidict-6.7.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b26684587228afed0d50cf804cc71062cc9c1cdf55051c4c6345d372947b268c", size = 248788, upload-time = "2026-01-26T02:46:00.862Z" }, + { url = "https://files.pythonhosted.org/packages/a9/14/a153a06101323e4cf086ecee3faadba52ff71633d471f9685c42e3736163/multidict-6.7.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9f9af11306994335398293f9958071019e3ab95e9a707dc1383a35613f6abcb9", size = 242842, upload-time = "2026-01-26T02:46:02.824Z" }, + { url = "https://files.pythonhosted.org/packages/41/5f/604ae839e64a4a6efc80db94465348d3b328ee955e37acb24badbcd24d83/multidict-6.7.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b4938326284c4f1224178a560987b6cf8b4d38458b113d9b8c1db1a836e640a2", size = 240237, upload-time = "2026-01-26T02:46:05.898Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/c3a5187bf66f6fb546ff4ab8fb5a077cbdd832d7b1908d4365c7f74a1917/multidict-6.7.1-cp314-cp314t-win32.whl", hash = "sha256:98655c737850c064a65e006a3df7c997cd3b220be4ec8fe26215760b9697d4d7", size = 48008, upload-time = "2026-01-26T02:46:07.468Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f7/addf1087b860ac60e6f382240f64fb99f8bfb532bb06f7c542b83c29ca61/multidict-6.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:497bde6223c212ba11d462853cfa4f0ae6ef97465033e7dc9940cdb3ab5b48e5", size = 53542, upload-time = "2026-01-26T02:46:08.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/81/4629d0aa32302ef7b2ec65c75a728cc5ff4fa410c50096174c1632e70b3e/multidict-6.7.1-cp314-cp314t-win_arm64.whl", hash = "sha256:2bbd113e0d4af5db41d5ebfe9ccaff89de2120578164f86a5d17d5a576d1e5b2", size = 44719, upload-time = "2026-01-26T02:46:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, +] + +[[package]] +name = "packaging" +version = "26.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "postgrest" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecation" }, + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/7c/54e7be05adc9fd6fd98dc572ddfc8982d45bec314a55711e37277d440698/postgrest-2.30.0.tar.gz", hash = "sha256:4f89eec56ce605ab6fbddd9b96d526a9bb44962796d44a5d85cb77640eb766c3", size = 14430, upload-time = "2026-05-06T17:35:21.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/aa/ff2e09f99f95ea96fddeb373646bf907dd89a24fc00b5d38e5674ca7c9ca/postgrest-2.30.0-py3-none-any.whl", hash = "sha256:30631e7993da542419f4217cf3b60aa641084731ea15e66a18526a3a52e40a7d", size = 23108, upload-time = "2026-05-06T17:35:20.531Z" }, +] + +[[package]] +name = "prometheus-client" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/14/7d0f567991f3a9af8d1cd4f619040c93b68f09a02b6d0b6ab1b2d1ded5fe/prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb", size = 78551, upload-time = "2024-12-03T14:59:12.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/c2/ab7d37426c179ceb9aeb109a85cda8948bb269b7561a0be870cc656eefe4/prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301", size = 54682, upload-time = "2024-12-03T14:59:10.935Z" }, +] + +[[package]] +name = "propcache" +version = "0.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/44/c87281c333769159c50594f22610f77398a47ccbfbbf23074e744e86f87c/propcache-0.5.2.tar.gz", hash = "sha256:01c4fc7480cd0598bb4b57022df55b9ca296da7fc5a8760bd8451a7e63a7d427", size = 50208, upload-time = "2026-05-08T21:02:12.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/cb/e27bc2b2737a0bb49962b275efa051e8f1c35a936df7d5139b6b658b7dc9/propcache-0.5.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:806719138ecd720339a12410fb9614ac9b2b2d3a5fdf8235d56981c36f4039ba", size = 95887, upload-time = "2026-05-08T21:00:11.277Z" }, + { url = "https://files.pythonhosted.org/packages/e6/13/b8ae04c59392f8d11c6cd9fb4011d1dc7c86b81225c770280300e259ffe1/propcache-0.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:db2b80ea58eab4f86b2beec3cc8b39e8ff9276ac20e96b7cce43c8ae84cd6b5a", size = 54654, upload-time = "2026-05-08T21:00:12.604Z" }, + { url = "https://files.pythonhosted.org/packages/2c/7d/49777a3e20b55863d4794384a38acd460c04157b0a00f8602b0d508b8431/propcache-0.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e5cbfac9f61484f7e9f3597775500cd3ebe8274e9b050c38f9525c77c97520bf", size = 55190, upload-time = "2026-05-08T21:00:13.935Z" }, + { url = "https://files.pythonhosted.org/packages/44/c7/085d0cd63062e84044e3f05797749c3f8e3938ff3aeb0eb2f69d43fafc91/propcache-0.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dbc581d2814337da56222fab8dc5f161cd798a434e49bac27930aaef798e144", size = 59995, upload-time = "2026-05-08T21:00:15.526Z" }, + { url = "https://files.pythonhosted.org/packages/9c/42/32cf8e3009e92b2645cf1e944f701e8ea4e924dffde1ee26db860bcbf7e4/propcache-0.5.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:857187f381f88c8e2fa2fe56ab94879d011b883d5a2ee5a1b60a8cd2a06846d9", size = 63422, upload-time = "2026-05-08T21:00:16.824Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1b/f112433f99fc979431b87a39ef169e3f8df070d99a72792c56d6937ac48b/propcache-0.5.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:178b4a2cdaac1818e2bf1c5a99b94383fa73ea5382e032a48dec07dc5668dc42", size = 64342, upload-time = "2026-05-08T21:00:18.362Z" }, + { url = "https://files.pythonhosted.org/packages/14/15/5574111ae50dd6e879456888c0eadd4c5a869959775854e18e18a6b345f3/propcache-0.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f328175a2cde1f0ff2c4ed8ce968b9dcfb55f3a7153f39e2957ed994da13476", size = 61639, upload-time = "2026-05-08T21:00:19.692Z" }, + { url = "https://files.pythonhosted.org/packages/cc/da/4d775080b1490c0ae604acda868bd71aabe3a89ed16f2aa4339eb8a283e7/propcache-0.5.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5671d09a36b06d0fd4a3da0fccbcae360e9b1570924171a15e9e0997f0249fba", size = 61588, upload-time = "2026-05-08T21:00:21.155Z" }, + { url = "https://files.pythonhosted.org/packages/04/ac/f076982cbe2195ee9cf32de5a1e46951d9fb399fc207f390562dd0fd8fb2/propcache-0.5.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80168e2ebe4d3ec6599d10ad8f520304ae1cad9b6c5a95372aef1b66b7bfb53a", size = 60029, upload-time = "2026-05-08T21:00:22.713Z" }, + { url = "https://files.pythonhosted.org/packages/70/60/189be62e0dd898dce3b331e1b8c7a543cd3a405ac0c81fe8ee8a9d5d77e1/propcache-0.5.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:45f11346f884bc47444f6e6647131055844134c3175b629f84952e2b5cd62b64", size = 56774, upload-time = "2026-05-08T21:00:24.001Z" }, + { url = "https://files.pythonhosted.org/packages/ea/9e/93377b9c7939c1ffae98f878dee955efadfd638078bc86dbc21f9d52f651/propcache-0.5.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e778ebd44ef4f66ed60a0416b06b489687db264a9c0b3620362f26489492913", size = 63532, upload-time = "2026-05-08T21:00:25.545Z" }, + { url = "https://files.pythonhosted.org/packages/14/f9/590ef6cfb9b8028d516d287812ece32bb0bc5f11fbb9c8bf6b2e6313fec8/propcache-0.5.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:c0cb9ed24c8964e172768d455a38254c2dd8a552905729ce006cad3d3dda59b1", size = 61592, upload-time = "2026-05-08T21:00:27.186Z" }, + { url = "https://files.pythonhosted.org/packages/b4/5e/70958b3034c297a630bba2f17ca7abc2d5f39a803ad7e370ab79d1ecd022/propcache-0.5.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1d1ad32d9d4355e2be65574fd0bfd3677e7066b009cd5b9b2dee8aa6a6393b33", size = 64788, upload-time = "2026-05-08T21:00:28.8Z" }, + { url = "https://files.pythonhosted.org/packages/12/fd/77fe5936d8c3086ca9048f7f415f122ed82e53884a9ec193646b42deef06/propcache-0.5.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c80f4ba3e8f00189165999a742ee526ebeccedf6c3f7beb0c7df821e9772435a", size = 62514, upload-time = "2026-05-08T21:00:30.098Z" }, + { url = "https://files.pythonhosted.org/packages/cf/74/66bd798b5b3be70aa1b391f5cc9d6a0a5532d7fd3b19ec0b213e72e6ad9d/propcache-0.5.2-cp312-cp312-win32.whl", hash = "sha256:8c7972d8f193740d9175f0998ab38717e6cd322d5935c5b0fef8c0d323fd9031", size = 39018, upload-time = "2026-05-08T21:00:31.622Z" }, + { url = "https://files.pythonhosted.org/packages/61/7c/5c0d34aa3024694d6dcb9271cdbdd08c4e47c1c0ad95ec7e7bc74cdea145/propcache-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:d9ee8826a7d47863a08ac44e1a5f611a462eefc3a194b492da242128bec75b42", size = 42322, upload-time = "2026-05-08T21:00:32.918Z" }, + { url = "https://files.pythonhosted.org/packages/4d/91/875812f1a3feb20ceba818ef39fbe4d92f1081e04ac815c822496d0d038b/propcache-0.5.2-cp312-cp312-win_arm64.whl", hash = "sha256:2800a4a8ead6b28cccd1ec54b59346f0def7922ee1c7598e8499c733cfbb7c84", size = 38172, upload-time = "2026-05-08T21:00:35.124Z" }, + { url = "https://files.pythonhosted.org/packages/c5/09/f049e45385503fe67db75a6b6186a7b9f0c3930366dc960522c312a825b1/propcache-0.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:099aaf4b4d1a02265b92a977edf00b5c4f63b3b17ac6de39b0d637c9cac0188a", size = 94457, upload-time = "2026-05-08T21:00:36.355Z" }, + { url = "https://files.pythonhosted.org/packages/6b/65/83d1d05655baf63113731bd5a1008435e14f8d1e5a06cbe4ec5b23ad7a31/propcache-0.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:68ce1c44c7a813a7f71ea04315a8c7b330b63db99d059a797a4651bb6f69f117", size = 53835, upload-time = "2026-05-08T21:00:38.072Z" }, + { url = "https://files.pythonhosted.org/packages/a9/12/a6ba6482bb5ea3260c000c9b20881c95fa11c6b30173715668259f844ed7/propcache-0.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fc299c129490f55f254cd90be0deca4764e36e9a7c08b4aa588479a3bbed3098", size = 54545, upload-time = "2026-05-08T21:00:39.319Z" }, + { url = "https://files.pythonhosted.org/packages/a9/19/7fa086f5764c59ec8a8e157cd93aa8497acc00aba9dcdec56bfffb32602d/propcache-0.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6ae2198be502c10f09b2516e7b5d019816924bc3183a43ce792a7bd6625e6f4", size = 59886, upload-time = "2026-05-08T21:00:40.621Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e4/5d7663dc8235956c8f5281698a3af1d351d8820341ddd890f59d9a9127f2/propcache-0.5.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6041d31504dc1779d700e1edcfb08eea334b357620b06681a4eabb57a74e574e", size = 63261, upload-time = "2026-05-08T21:00:41.775Z" }, + { url = "https://files.pythonhosted.org/packages/4a/4a/15a03adee24d6350da4292caeac44c34c033d2afe5e87eb370f38854560f/propcache-0.5.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7eabc04151c78a9f4d5bbb5f1faf571e4defeb4b585e0fe95b60ff2dbe4d3d7", size = 64184, upload-time = "2026-05-08T21:00:43.018Z" }, + { url = "https://files.pythonhosted.org/packages/8b/c6/979176efdaa3d239e36d503d5af63a0a773b36662ed8f52e5b6a6d9fd40e/propcache-0.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4db0ba63d693afd40d249bd93f842b5f144f8fcbb83de05660373bcf30517b1d", size = 61534, upload-time = "2026-05-08T21:00:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/c8/22/63e8cd1bae4c2d2be6493b6b7d10566ddafad88137cfbc99964a1119853c/propcache-0.5.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1dbcf7675229b35d31abb6547d8ebc8c27a830ac3f9a794edff6254873ec7c0a", size = 61500, upload-time = "2026-05-08T21:00:45.796Z" }, + { url = "https://files.pythonhosted.org/packages/60/5a/28e5d9acbac1cc9ccb67045e8c1b943aa8d79fdf39c93bd73cacd68008ea/propcache-0.5.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d310c013aad2c72f1c3f2f8dd3279d460a858c551f97aeb8c63e4693cca7b4d2", size = 59994, upload-time = "2026-05-08T21:00:47.093Z" }, + { url = "https://files.pythonhosted.org/packages/f3/40/db650677f554a95b9c01a7c9d93d629e93a15562f5deb4573c9ee136fed2/propcache-0.5.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:06187263ddad280d05b4d8a8b3bb7d164cbebd469236544a42e6d9b28ac6a4fa", size = 56884, upload-time = "2026-05-08T21:00:48.376Z" }, + { url = "https://files.pythonhosted.org/packages/80/45/70b39b89516ff8b96bf732fa6fded8cef20f293cb1508690101c3c07ec51/propcache-0.5.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3115559b8effafd63b142ea5ed53d63a16ea6469cbc63dce4ee194b42db5d853", size = 63464, upload-time = "2026-05-08T21:00:49.954Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e2/fa59d3a89eac5534293124af4f1d0d0ada091ce4a0ab4610ce03fd2bdd8d/propcache-0.5.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c60462af8e6dc30c35407c7237ea908d777b22862bbee27bc4699c0d8bcdc45a", size = 61588, upload-time = "2026-05-08T21:00:51.281Z" }, + { url = "https://files.pythonhosted.org/packages/0b/97/efb547a55c4bc7381cfb202d6a2239ac621045277bc1ea5dfd3a7f0516c0/propcache-0.5.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40314bca9ac559716fe374094fc81c11dcc34b64fd6c585360f5775690505704", size = 64667, upload-time = "2026-05-08T21:00:52.602Z" }, + { url = "https://files.pythonhosted.org/packages/92/56/f5c7d9b4b7595d5127da38974d791b2153f3d1eae6c674af3583ace92ad3/propcache-0.5.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cfa21e036ce1e1db2be04ba3b85d2df1bb1702fa01932d984c5464c665228ff4", size = 62463, upload-time = "2026-05-08T21:00:54.303Z" }, + { url = "https://files.pythonhosted.org/packages/bd/3b/484a3a65fc9f9f60c41dcd17b428bace5389544e2c680994534a20755066/propcache-0.5.2-cp313-cp313-win32.whl", hash = "sha256:f156a3529f38063b6dbaf356e15602a7f95f8055b1295a438433a6386f10463d", size = 38621, upload-time = "2026-05-08T21:00:55.808Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fd/3f0f10dba4dabad3bf53102be007abf55481067952bde0fdddff439e7c61/propcache-0.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:dfed59d0a5aeb01e242e66ff0300bc4a265a7c05f612d30016f0b60b1017d757", size = 41649, upload-time = "2026-05-08T21:00:57.061Z" }, + { url = "https://files.pythonhosted.org/packages/90/ec/6ce619cc32bb500a482f811f9cd509368b4e58e638d13f2c68f370d6b475/propcache-0.5.2-cp313-cp313-win_arm64.whl", hash = "sha256:ba338430e87ceb9c8f0cf754de38a9860560261e56c00376debd628698a7364f", size = 37636, upload-time = "2026-05-08T21:00:58.646Z" }, + { url = "https://files.pythonhosted.org/packages/1b/82/c1d268bbbf2ef981c5bf0fbbe746db617c66e3bcefe431a1aa8943fbe23a/propcache-0.5.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a592f5f3da71c8691c788c13cb6734b6d17663d2e1cb8caddf0673d01ef8847d", size = 98872, upload-time = "2026-05-08T21:00:59.889Z" }, + { url = "https://files.pythonhosted.org/packages/f4/d4/52c871e73e864e6b34c0e2d58ac1ec5ccd149497ddc7ad2137ae98323a35/propcache-0.5.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6a997d0489e9668a384fcfd5061b857aa5361de73191cac204d04b889cfbbafa", size = 56257, upload-time = "2026-05-08T21:01:01.195Z" }, + { url = "https://files.pythonhosted.org/packages/67/f0/9b90ca2a210b3d09bcfcd96ecd0f55545c091535abce2a45de2775cfd357/propcache-0.5.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:10734b5484ea113152ee25a91dccedf81631791805d2c9ccb054958e51842c94", size = 56696, upload-time = "2026-05-08T21:01:02.941Z" }, + { url = "https://files.pythonhosted.org/packages/9d/0e/6e9d4ba07c8e56e21ddec1e75f12148142b21ca83a51871babce095334f4/propcache-0.5.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cafca7e56c12bb02ae16d283742bef25a61122e9dab2b5b3f2ccbe589ce32164", size = 62378, upload-time = "2026-05-08T21:01:04.475Z" }, + { url = "https://files.pythonhosted.org/packages/65/19/c10badaa463dde8a27ce884f8ee2ec37e6035b7c9f5ff0c8f74f06f08dac/propcache-0.5.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f064f8d2b59177878b7615df1735cd8fe3462ed6be8c7b217d17a276489c2b7f", size = 65283, upload-time = "2026-05-08T21:01:05.959Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b6/93bea99ca80e19cef6512a8580e5b7857bbe09422d9daa7fd4ef5723306c/propcache-0.5.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f78abfa8dfc32376fd1aacf597b2f2fbbe0ea751419aee718af5d4f82537ef8c", size = 66616, upload-time = "2026-05-08T21:01:07.228Z" }, + { url = "https://files.pythonhosted.org/packages/83/e4/5c7462e50625f051f37fb38b8224f7639f667184bbd34424ec83819bb1b7/propcache-0.5.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7467da8a9822bf1a55336f877340c5bcbd3c482afc43a99771169f74a26dedc", size = 63773, upload-time = "2026-05-08T21:01:08.514Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b6/99238894047b13c823be25027e736626cd414a52a5e30d2c3347c2733529/propcache-0.5.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a6ddc6ac9e25de626c1f129c1b467d7ecd33ce2237d3fd0c4e429feef0a7ee1f", size = 63664, upload-time = "2026-05-08T21:01:09.874Z" }, + { url = "https://files.pythonhosted.org/packages/85/1e/a3a1a63116a2b8edb415a8bb9a6f0c34bd03830b1e18e8ce2904e1dc1cf4/propcache-0.5.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2f22cbbac9e26a8e864c0985ff1268d5d939d53d9d9411a9824279097e03a2cb", size = 62643, upload-time = "2026-05-08T21:01:11.132Z" }, + { url = "https://files.pythonhosted.org/packages/e4/03/893cf147de2fc6543c5eaa07ad833170e7e2a2385725bbebe8c0503723bb/propcache-0.5.2-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:fc76378c62a0f04d0cd82fbb1a2cd2d7e28fcb40d5873f28a6c44e388aaa2751", size = 59595, upload-time = "2026-05-08T21:01:12.387Z" }, + { url = "https://files.pythonhosted.org/packages/86/3b/04c1a2e12c57766568ba75ba72b3bf2042818d4c1425fab6fc07155c7cff/propcache-0.5.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:acd2c8edba48e31e58a363b8cf4e5c7db3b04b3f9e371f601df30d9b0d244836", size = 65711, upload-time = "2026-05-08T21:01:13.676Z" }, + { url = "https://files.pythonhosted.org/packages/1c/34/80f8d0099f8d6bacc4de1624c85672681c8cd1149ca2da0e38fd120b817f/propcache-0.5.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:452b5065457eb9991ec5eb38ff41d6cd4c991c9ac7c531c4d5849ae473a9a13f", size = 64247, upload-time = "2026-05-08T21:01:14.936Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1a/8b08f3a5f1037e9e370c55883ceeeee0f6dd0416fb2d2d67b8bfc91f2a79/propcache-0.5.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:3430bb2bfe1331885c427745a751e774ee679fd4344f80b97bf879815fe8fa55", size = 67102, upload-time = "2026-05-08T21:01:16.281Z" }, + { url = "https://files.pythonhosted.org/packages/34/68/8bdb7bb7756d76e005490649d10e4a8369e610c74d619f71e1aedf889e9c/propcache-0.5.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cef6cea3922890dd6c9654971001fa797b526c16ab5e1e46c05fd6f877be7568", size = 64964, upload-time = "2026-05-08T21:01:17.57Z" }, + { url = "https://files.pythonhosted.org/packages/0a/aa/50fb0b5d3968b61a510926ff8b8465f1d6e976b3ab74496d7a4b9fc42515/propcache-0.5.2-cp313-cp313t-win32.whl", hash = "sha256:72d61e16dd78228b58c5d47be830ff3da7e5f139abdf0aef9d86cde1c5cf2191", size = 42546, upload-time = "2026-05-08T21:01:18.946Z" }, + { url = "https://files.pythonhosted.org/packages/ae/4c/0ddbae64321bd4a95bcbfc19307238016b5b1fee645c84626c8d539e5b74/propcache-0.5.2-cp313-cp313t-win_amd64.whl", hash = "sha256:0958834041a0166d343b8d2cedcd8bcbaeb4fdbe0cf08320c5379f143c3be6e7", size = 46330, upload-time = "2026-05-08T21:01:20.162Z" }, + { url = "https://files.pythonhosted.org/packages/00/d9/9cddc8efb78d8af264c5ec9f6d10b62f57c515feda8d321595f56010fb23/propcache-0.5.2-cp313-cp313t-win_arm64.whl", hash = "sha256:6de8bd93ddde9b992cf2b2e0d796d501a19026b5b9fd87356d7d0779531a8d96", size = 40521, upload-time = "2026-05-08T21:01:21.399Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ea/23ee535d90ce8bcc465a3028eb3cc0ce3bd1005f4bb27710b30587de798d/propcache-0.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:46088abff4cba581dea21ae0467a480526cb25aa5f3c269e909f800328bc3999", size = 94662, upload-time = "2026-05-08T21:01:22.683Z" }, + { url = "https://files.pythonhosted.org/packages/b5/06/c5a52f419b5d8972f8d46a7577476090d8e3263ff589ce40b5ca4968d5be/propcache-0.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fc88b26f08d634f7bc819a7852e5214f5802641ab8d9fd5326892292eee1993e", size = 53928, upload-time = "2026-05-08T21:01:23.986Z" }, + { url = "https://files.pythonhosted.org/packages/63/b1/4260d67d6bd85e58a66b72d54ce15d5de789b6f3870cc6bedf8ff9667401/propcache-0.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:97797ebb098e670a2f92dd66f32897e30d7615b14e7f59711de23e30a9072539", size = 54650, upload-time = "2026-05-08T21:01:25.305Z" }, + { url = "https://files.pythonhosted.org/packages/70/06/2f46c318e3307cd7a6a7481def374ce838c0fe20084b39dd54b0879d0e99/propcache-0.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba57fffe4ac99c5d30076161b5866336d97600769bad35cc68f7774b15298a4e", size = 59912, upload-time = "2026-05-08T21:01:26.545Z" }, + { url = "https://files.pythonhosted.org/packages/4c/29/fe1aebec2ce57ab985a9c382bded1124431f85078113aa222c5d278430d4/propcache-0.5.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:583c19759d9eec1e5b69e2fbef36a7d9c326041be9746cb822d335c8cedc2979", size = 63300, upload-time = "2026-05-08T21:01:27.937Z" }, + { url = "https://files.pythonhosted.org/packages/b4/18/2334b26768b6c82be8c69e83671b767d5ef426aa09b0cba6c2ea47816774/propcache-0.5.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d0326e2e5e1f3163fa306c834e48e8d490e5fae607a097a40c0648109b47ba80", size = 64208, upload-time = "2026-05-08T21:01:29.484Z" }, + { url = "https://files.pythonhosted.org/packages/2b/76/7f1bfd6afff4c5e38e36a3c6d68eb5f4b7311ea80baf693db78d95b603c4/propcache-0.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e00820e192c8dbebcafb383ebbf99030895f09905e7a0eb2e0340a0bcc2bc825", size = 61633, upload-time = "2026-05-08T21:01:31.068Z" }, + { url = "https://files.pythonhosted.org/packages/c4/46/b3ff8aba2b4953a3e50de2cf72f1b5748b8eca93b15f3dc2c84339084c09/propcache-0.5.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c66afea89b1e43725731d2004732a046fe6fe955d51f952c3e95a7314a284a39", size = 61724, upload-time = "2026-05-08T21:01:32.374Z" }, + { url = "https://files.pythonhosted.org/packages/c5/01/814cfcafbcff954f94c01cf30e097ddc88a076b5440fbcf4570753437d40/propcache-0.5.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc37dec6c6cdad0b57881a5658fd14fbf53e333b1a86cf86559f190e1d9ec4", size = 60069, upload-time = "2026-05-08T21:01:33.67Z" }, + { url = "https://files.pythonhosted.org/packages/da/68/5c6f7622d510cc666a300687e06fd060c1a43361c0c9b20d284f06d8096a/propcache-0.5.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:5570dbcc97571c15f68068e529c92715a12f8d54030e272d264b377e22bd17a5", size = 57099, upload-time = "2026-05-08T21:01:34.915Z" }, + { url = "https://files.pythonhosted.org/packages/55/27/9cb0b4c679124085327957d42521c99dba04c88c90c3e55a6f0b633ebccc/propcache-0.5.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f814362777a9f841adddb200ecdf8f5cb1e5a3c4b7a86378edbd6ccb26edd702", size = 63391, upload-time = "2026-05-08T21:01:36.231Z" }, + { url = "https://files.pythonhosted.org/packages/f0/9d/7258aaa5bdf60fc6f27591eef6fe52768cb0beda7140be477c8b12c9794a/propcache-0.5.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:196913dea116aeb5a2ba95af4ddcb7ea85559ae07d8eee8751688310d09168c3", size = 61626, upload-time = "2026-05-08T21:01:37.545Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0d/41c602003e8a9b16fe1e7eadf62c7bfba9d5474370b24200bf48b315f45f/propcache-0.5.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:6e7b8719005dd1175be4ab1cd25e9b98659a5e0347331506ec6760d2773a7fb5", size = 64781, upload-time = "2026-05-08T21:01:38.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f3/38e66b1856e9bd079deea015bc4a55f7767c0e4db2f7dcf69e7e680ba4ce/propcache-0.5.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:51f96d685ab16e88cab128cd37a52c5da540809c8b879fa047731bfcb4ad35a4", size = 62570, upload-time = "2026-05-08T21:01:40.415Z" }, + { url = "https://files.pythonhosted.org/packages/95/ca/bbfe9b910ce57dde8bb4876b4520fc02a4e89497c10de26be936758a3aaa/propcache-0.5.2-cp314-cp314-win32.whl", hash = "sha256:cc6fc3cc62e8501d3ed62894425040d2728ecddb1ed072737a5c70bd537aa9f0", size = 39436, upload-time = "2026-05-08T21:01:41.654Z" }, + { url = "https://files.pythonhosted.org/packages/61/d2/45c9defbaa1ea297035d9d4cce9e8f80daafbf19319c6007f157c6256ea9/propcache-0.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:81e3a30b0bb60caa22033dd0f8a3618d1d67356212514f62c57db75cb0ef410c", size = 42373, upload-time = "2026-05-08T21:01:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/44/68/9ea5103f41d5217d7d6ec24db90018e23aebec070c3f9a6e54d12b841fd8/propcache-0.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:0d2c9bf8528f135dbb805ce027567e09164f7efa51a2be07458a2c0420f292d0", size = 38554, upload-time = "2026-05-08T21:01:44.336Z" }, + { url = "https://files.pythonhosted.org/packages/8a/81/fadf555f42d3b762eea8a53950b0489fdc0aa9da5f8ed9e10ce0a4e01b48/propcache-0.5.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:4bc8ff1feffc6a61c7002ffe84634c41b822e104990ae009f44a0834430070bb", size = 99395, upload-time = "2026-05-08T21:01:45.883Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c9/c61e134a686949cf7971af3a390148b1156f7be81c73bc0cd12c873e2d48/propcache-0.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:79aa3ff0a9b566633b642fa9caf7e21ed1c13d6feca718187873f199e1514078", size = 56653, upload-time = "2026-05-08T21:01:47.307Z" }, + { url = "https://files.pythonhosted.org/packages/cb/73/daf935ea7048ddd7ec8eec5345b4a40b619d2d178b3c0a0900796bc3c794/propcache-0.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1b31822f4474c4036bae62de9402710051d431a606d6a0f907fec79935a071aa", size = 56914, upload-time = "2026-05-08T21:01:48.573Z" }, + { url = "https://files.pythonhosted.org/packages/79/9f/aba959b435ea18617edd7cf0a7ad0b9c574b8fc7e3d2cd55fb59cb255d33/propcache-0.5.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13fef48778b5a2a756523fdb781326b028ca75e32858b04f2cdd19f394564917", size = 62567, upload-time = "2026-05-08T21:01:49.903Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a1/859942de9a791ff42f6141736f5b37749b8f53e65edfa49638c67dd67e6a/propcache-0.5.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8b73ab70f1a3351fbc71f663b3e645af6dd0329100c353081cf69c37433fc6fe", size = 65542, upload-time = "2026-05-08T21:01:51.204Z" }, + { url = "https://files.pythonhosted.org/packages/b5/61/315bc0fd6c0fc7f80a528b8afd209e5fc4a875ea79571b91b8f50f442907/propcache-0.5.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5538d2c13d93e4698af7e092b57bc7298fd35d1d58e656ae18f23ee0d0378e03", size = 66845, upload-time = "2026-05-08T21:01:52.539Z" }, + { url = "https://files.pythonhosted.org/packages/47/f7/9f8122e3132e8e354ac41975ef8f1099be7d5a16bc7ae562734e993665c0/propcache-0.5.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd645f03898405cabe694fb8bc35241e3a9c332ec85627584fe3de201452b335", size = 63985, upload-time = "2026-05-08T21:01:53.847Z" }, + { url = "https://files.pythonhosted.org/packages/c8/54/c317819ec157cbf6f35df9df9657a6f82daf34d5faf15948b2f639c2192e/propcache-0.5.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a473b3440261e0c60706e732b2ed2f517857344fc21bf48fdfe211e2d98eb285", size = 63999, upload-time = "2026-05-08T21:01:55.179Z" }, + { url = "https://files.pythonhosted.org/packages/5a/56/387e3f7dfce0a9233df41fb888aa1c30222cb4bbbf09537c02dd9bd85fe2/propcache-0.5.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7afa37062e6650640e932e4cc9297d81f9f42d9944029cc386b8247dea4da837", size = 62779, upload-time = "2026-05-08T21:01:57.489Z" }, + { url = "https://files.pythonhosted.org/packages/a1/9c/596784cb5824ed61ee960d3f8655a3f0993e107c6e98ab6c818b7fb92ccb/propcache-0.5.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:8a90efd5777e996e42d568db9ac740b944d691e565cbfd31b2f7832f9184b2b8", size = 59796, upload-time = "2026-05-08T21:01:58.736Z" }, + { url = "https://files.pythonhosted.org/packages/c2/3d/1a6cfa1726a48542c1e8784a0761421476a5b68e09b7f36bf95eb954aaba/propcache-0.5.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:f19bb891234d72535764d703bfed1153cc34f4214d5bd7150aee1eec9e8f4366", size = 66023, upload-time = "2026-05-08T21:02:00.228Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0e/05fd6990369477076e4e280bcb970de760fddf0161a46e988bc95f7940ec/propcache-0.5.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:32775082acd2d807ee3db715c7770d38767b817870acfa08c29e057f3c4d5b56", size = 64448, upload-time = "2026-05-08T21:02:01.888Z" }, + { url = "https://files.pythonhosted.org/packages/cd/86/5f8da315a4309c62c10c0b2516b17492d5d3bbe1bb862b96604db67e2a37/propcache-0.5.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9282fb1a3bccd038da9f768b927b24a0c753e466c086b7c4f3c6982851eefb2d", size = 67329, upload-time = "2026-05-08T21:02:03.484Z" }, + { url = "https://files.pythonhosted.org/packages/da/d3/3368efe79ab21f0cdf86ef49895811c9cc933131d4cde1f28a624e22e712/propcache-0.5.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cc49723e2f60d6b32a0f0b08a3fd6d13203c07f1cd9566cfce0f12a917c967a2", size = 65172, upload-time = "2026-05-08T21:02:04.745Z" }, + { url = "https://files.pythonhosted.org/packages/d5/07/127e8b0bacfb325396196f9d976a22453049b89b9b2b08477cc3145faa44/propcache-0.5.2-cp314-cp314t-win32.whl", hash = "sha256:2d7aa89ebca5acc98cba9d1472d976e394782f587bad6661003602a619fd1821", size = 43813, upload-time = "2026-05-08T21:02:06.025Z" }, + { url = "https://files.pythonhosted.org/packages/88/fb/46dad6c0ae49ed230ab1b16c890c2b6314e2403e6c412976f4a72d64a527/propcache-0.5.2-cp314-cp314t-win_amd64.whl", hash = "sha256:d447bb0b3054be5818458fbb171208b1d9ff11eba14e18ca18b90cbb45767370", size = 47764, upload-time = "2026-05-08T21:02:07.353Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c4/a47d0a63aa309d10d59ede6e9d4cff03a344a79d1f0f4cd0cd74997b53e0/propcache-0.5.2-cp314-cp314t-win_arm64.whl", hash = "sha256:fe67a3d11cd9b4efabfa45c3d00ffba2b26811442a73a581a94b67c2b5faccf6", size = 41140, upload-time = "2026-05-08T21:02:09.065Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ed/1cdcab6ba3d6ab7feca11fc14f0eeea80755bb53ef4e892079f31b10a25f/propcache-0.5.2-py3-none-any.whl", hash = "sha256:be1ddfcbb376e3de5d2e2db1d58d6d67463e6b4f9f040c000de8e300295465fe", size = 14036, upload-time = "2026-05-08T21:02:10.673Z" }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + +[[package]] +name = "pydantic" +version = "2.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775, upload-time = "2026-05-06T13:43:05.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464, upload-time = "2026-05-06T13:37:06.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/8c/af022f0af448d7747c5154288d46b5f2bc5f17366eaa0e23e9aa04d59f3b/pydantic_core-2.46.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3245406455a5d98187ec35530fd772b1d799b26667980872c8d4614991e2c4a2", size = 2106158, upload-time = "2026-05-06T13:38:57.215Z" }, + { url = "https://files.pythonhosted.org/packages/19/95/6195171e385007300f0f5574592e467c568becce2d937a0b6804f218bc49/pydantic_core-2.46.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:962ccbab7b642487b1d8b7df90ef677e03134cf1fd8880bf698649b22a69371f", size = 1951724, upload-time = "2026-05-06T13:37:02.697Z" }, + { url = "https://files.pythonhosted.org/packages/8e/bc/f47d1ff9cbb1620e1b5b697eef06010035735f07820180e74178226b27b3/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8233f2947cf85404441fd7e0085f53b10c93e0ee78611099b5c7237e36aacbf7", size = 1975742, upload-time = "2026-05-06T13:37:09.448Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/9b9a5b0306345664a2da6410877af6e8082481b5884b3ddd78d47c6013ce/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a233125ac121aa3ffba9a2b59edfc4a985a76092dc8279586ab4b71390875e7", size = 2052418, upload-time = "2026-05-06T13:37:38.234Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b7/a65fec226f5d78fc39f4a13c4cc0c768c22b113438f60c14adc9d2865038/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b712b53160b79a5850310b912a5ef8e57e56947c8ad690c227f5c9d7e561712", size = 2232274, upload-time = "2026-05-06T13:38:27.753Z" }, + { url = "https://files.pythonhosted.org/packages/68/f0/92039db98b907ef49269a8271f67db9cb78ae2fc68062ef7e4e77adb5f61/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9401557acd873c3a7f3eb9383edef8ac4968f9510e340f4808d427e75667e7b4", size = 2309940, upload-time = "2026-05-06T13:38:05.353Z" }, + { url = "https://files.pythonhosted.org/packages/5f/97/2aab507d3d00ca626e8e57c1eac6a79e4e5fbcc63eb99733ff55d1717f65/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:926c9541b14b12b1681dca8a0b75feb510b06c6341b70a8e500c2fdcff837cce", size = 2094516, upload-time = "2026-05-06T13:39:10.577Z" }, + { url = "https://files.pythonhosted.org/packages/22/37/a8aca44d40d737dde2bc05b3c6c07dff0de07ce6f82e9f3167aeaf4d5dea/pydantic_core-2.46.4-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:56cb4851bcaf3d117eddcef4fe66afd750a50274b0da8e22be256d10e5611987", size = 2136854, upload-time = "2026-05-06T13:40:22.59Z" }, + { url = "https://files.pythonhosted.org/packages/24/99/fcef1b79238c06a8cbec70819ac722ba76e02bc8ada9b0fd66eba40da01b/pydantic_core-2.46.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c68fcd102d71ea85c5b2dfac3f4f8476eff42a9e078fd5faefff6d145063536b", size = 2180306, upload-time = "2026-05-06T13:40:10.666Z" }, + { url = "https://files.pythonhosted.org/packages/ae/6c/fc44000918855b42779d007ae63b0532794739027b2f417321cddbc44f6a/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b2f69dec1725e79a012d920df1707de5caf7ed5e08f3be4435e25803efc47458", size = 2190044, upload-time = "2026-05-06T13:40:43.231Z" }, + { url = "https://files.pythonhosted.org/packages/6b/65/d9cadc9f1920d7a127ad2edba16c1db7916e59719285cd6c94600b0080ba/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8d0820e8192167f80d88d64038e609c31452eeca865b4e1d9950a27a4609b00b", size = 2329133, upload-time = "2026-05-06T13:39:57.365Z" }, + { url = "https://files.pythonhosted.org/packages/d0/cf/c873d91679f3a30bcf5e7ac280ce5573483e72295307685120d0d5ad3416/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fbdb89b3e1c94a30cc5edfce477c6e6a5dc4d8f84665b455c27582f211a1c72c", size = 2374464, upload-time = "2026-05-06T13:38:06.976Z" }, + { url = "https://files.pythonhosted.org/packages/47/bd/6f2fc8188f31bf10590f1e98e7b306336161fac930a8c514cd7bd828c7dc/pydantic_core-2.46.4-cp312-cp312-win32.whl", hash = "sha256:9aa768456404a8bf48a4406685ac2bec8e72b62c69313734fa3b73cf33b3a894", size = 1974823, upload-time = "2026-05-06T13:40:47.985Z" }, + { url = "https://files.pythonhosted.org/packages/40/8c/985c1d41ea1107c2534abd9870e4ed5c8e7669b5c308297835c001e7a1c4/pydantic_core-2.46.4-cp312-cp312-win_amd64.whl", hash = "sha256:e9c26f834c65f5752f3f06cb08cb86a913ceb7274d0db6e267808a708b46bc89", size = 2072919, upload-time = "2026-05-06T13:39:21.153Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ba/f463d006e0c47373ca7ec5e1a261c59dc01ef4d62b2657af925fb0deee3a/pydantic_core-2.46.4-cp312-cp312-win_arm64.whl", hash = "sha256:4fc73cb559bdb54b1134a706a2802a4cddd27a0633f5abb7e53056268751ac6a", size = 2027604, upload-time = "2026-05-06T13:39:03.753Z" }, + { url = "https://files.pythonhosted.org/packages/51/a2/5d30b469c5267a17b39dec53208222f76a8d351dfac4af661888c5aee77d/pydantic_core-2.46.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5d5902252db0d3cedf8d4a1bc68f70eeb430f7e4c7104c8c476753519b423008", size = 2106306, upload-time = "2026-05-06T13:37:48.029Z" }, + { url = "https://files.pythonhosted.org/packages/c1/81/4fa520eaffa8bd7d1525e644cd6d39e7d60b1592bc5b516693c7340b50f1/pydantic_core-2.46.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94f0688e7b8d0a67abf40e57a7eaaecd17cc9586706a31b76c031f63df052b4", size = 1951906, upload-time = "2026-05-06T13:37:17.012Z" }, + { url = "https://files.pythonhosted.org/packages/03/d5/fd02da45b659668b05923b17ba3a0100a0a3d5541e3bd8fcc4ecb711309e/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f027324c56cd5406ca49c124b0db10e56c69064fec039acc571c29020cc87c76", size = 1976802, upload-time = "2026-05-06T13:37:35.113Z" }, + { url = "https://files.pythonhosted.org/packages/21/f2/95727e1368be3d3ed485eaab7adbd7dda408f33f7a36e8b48e0144002b91/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e739fee756ba1010f8bcccb534252e85a35fe45ae92c295a06059ce58b74ccd3", size = 2052446, upload-time = "2026-05-06T13:37:12.313Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/5d99feea3f77c7234b8718075b23db11532773c1a0dbd9b9490215dc2eeb/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d56801be94b86a9da183e5f3766e6310752b99ff647e38b09a9500d88e46e76", size = 2232757, upload-time = "2026-05-06T13:39:01.149Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3a/508ac615935ef7588cf6d9e9b91309fdc2da751af865e02a9098de88258c/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2412e734dcb48da14d4e4006b82b46b74f2518b8a26ee7e58c6844a6cd6d03c4", size = 2309275, upload-time = "2026-05-06T13:37:41.406Z" }, + { url = "https://files.pythonhosted.org/packages/07/f8/41db9de19d7987d6b04715a02b3b40aea467000275d9d758ffaa31af7d50/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9551187363ffc0de2a00b2e47c25aeaeb1020b69b668762966df15fc5659dd5a", size = 2094467, upload-time = "2026-05-06T13:39:18.847Z" }, + { url = "https://files.pythonhosted.org/packages/2c/e2/f35033184cb11d0052daf4416e8e10a502ea2ac006fc4f459aee872727d1/pydantic_core-2.46.4-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0186750b482eefa11d7f435892b09c5c606193ef3375bcf94aa00ae6bfb66262", size = 2134417, upload-time = "2026-05-06T13:40:17.944Z" }, + { url = "https://files.pythonhosted.org/packages/7e/7b/6ceeb1cc90e193862f444ebe373d8fdf613f0a82572dde03fb10734c6c71/pydantic_core-2.46.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5855698a4856556d86e8e6cd8434bc3ac0314ee8e12089ae0e143f64c6256e4e", size = 2179782, upload-time = "2026-05-06T13:40:32.618Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f2/c8d7773ede6af08036423a00ae0ceffce266c3c52a096c435d68c896083f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cbaf13819775b7f769bf4a1f066cb6df7a28d4480081a589828ef190226881cd", size = 2188782, upload-time = "2026-05-06T13:36:51.018Z" }, + { url = "https://files.pythonhosted.org/packages/59/31/0c864784e31f09f05cdd87606f08923b9c9e7f6e51dd27f20f62f975ce9f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:633147d34cf4550417f12e2b1a0383973bdf5cdfde212cb09e9a581cf10820be", size = 2328334, upload-time = "2026-05-06T13:40:37.764Z" }, + { url = "https://files.pythonhosted.org/packages/c2/eb/4f6c8a41efa30baa755590f4141abf3a8c370fab610915733e74134a7270/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:82cf5301172168103724d49a1444d3378cb20cdee30b116a1bd6031236298a5d", size = 2372986, upload-time = "2026-05-06T13:39:34.152Z" }, + { url = "https://files.pythonhosted.org/packages/5b/24/b375a480d53113860c299764bfe9f349a3dc9108b3adc0d7f0d786492ebf/pydantic_core-2.46.4-cp313-cp313-win32.whl", hash = "sha256:9fa8ae11da9e2b3126c6426f147e0fba88d96d65921799bb30c6abd1cb2c97fb", size = 1973693, upload-time = "2026-05-06T13:37:55.072Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e8/cff247591966f2d22ec8c003cd7587e27b7ba7b81ab2fb888e3ab75dc285/pydantic_core-2.46.4-cp313-cp313-win_amd64.whl", hash = "sha256:6b3ace8194b0e5204818c92802dcdca7fc6d88aabbb799d7c795540d9cd6d292", size = 2071819, upload-time = "2026-05-06T13:38:49.139Z" }, + { url = "https://files.pythonhosted.org/packages/c6/1a/f4aee670d5670e9e148e0c82c7db98d780be566c6e6a97ee8035528ca0b3/pydantic_core-2.46.4-cp313-cp313-win_arm64.whl", hash = "sha256:184c081504d17f1c1066e430e117142b2c77d9448a97f7b65c6ac9fd9aee238d", size = 2027411, upload-time = "2026-05-06T13:40:45.796Z" }, + { url = "https://files.pythonhosted.org/packages/8d/74/228a26ddad29c6672b805d9fd78e8d251cd04004fa7eed0e622096cd0250/pydantic_core-2.46.4-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:428e04521a40150c85216fc8b85e8d39fece235a9cf5e383761238c7fa9b96fb", size = 2102079, upload-time = "2026-05-06T13:38:41.019Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/8970b150a4b4365623ae00fc88603491f763c627311ae8031e3111356d6e/pydantic_core-2.46.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23ace664830ee0bfe014a0c7bc248b1f7f25ed7ad103852c317624a1083af462", size = 1952179, upload-time = "2026-05-06T13:36:59.812Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/5211a831ae054928054b2f79731661087a2bc5c01e825c672b3a4a8f1b3e/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce5c1d2a8b27468f433ca974829c44060b8097eedc39933e3c206a90ee49c4a9", size = 1978926, upload-time = "2026-05-06T13:37:39.933Z" }, + { url = "https://files.pythonhosted.org/packages/57/e9/689668733b1eb67adeef047db3c2e8788fcf65a7fd9c9e2b46b7744fe245/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7283d57845ecf5a163403eb0702dfc220cc4fbdd18919cb5ccea4f95ee1cdab4", size = 2046785, upload-time = "2026-05-06T13:38:01.995Z" }, + { url = "https://files.pythonhosted.org/packages/60/d9/6715260422ff50a2109878fd24d948a6c3446bb2664f34ee78cd972b3acd/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8daafc69c93ee8a0204506a3b6b30f586ef54028f52aeeeb5c4cfc5184fd5914", size = 2228733, upload-time = "2026-05-06T13:40:50.371Z" }, + { url = "https://files.pythonhosted.org/packages/18/ae/fdb2f64316afca925640f8e70bb1a564b0ec2721c1389e25b8eb4bf9a299/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2213145bcc2ba85884d0ac63d222fece9209678f77b9b4d76f054c561adb28", size = 2307534, upload-time = "2026-05-06T13:37:21.531Z" }, + { url = "https://files.pythonhosted.org/packages/89/1d/8eff589b45bb8190a9d12c49cfad0f176a5cbd1534908a6b5125e2886239/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a5f930472650a82629163023e630d160863fce524c616f4e5186e5de9d9a49b", size = 2099732, upload-time = "2026-05-06T13:39:31.942Z" }, + { url = "https://files.pythonhosted.org/packages/06/d5/ee5a3366637fee41dee51a1fc91562dcf12ddbc68fda34e6b253da2324bb/pydantic_core-2.46.4-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:c1b3f518abeca3aa13c712fd202306e145abf59a18b094a6bafb2d2bbf59192c", size = 2129627, upload-time = "2026-05-06T13:37:25.033Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/2414be571d2c6a6c4d08be21f9292b6d3fdb08949a97b6dfe985017821db/pydantic_core-2.46.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a7dd0b3ee80d90150e3495a3a13ac34dbcbfd4f012996a6a1d8900e91b5c0fb", size = 2179141, upload-time = "2026-05-06T13:37:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/7b/79/7daa95be995be0eecc4cf75064cb33f9bbbfe3fe0158caf2f0d4a996a5c7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:3fb702cd90b0446a3a1c5e470bfa0dd23c0233b676a9099ddcc964fa6ca13898", size = 2184325, upload-time = "2026-05-06T13:36:53.615Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cb/d0a382f5c0de8a222dc61c65348e0ce831b1f68e0a018450d31c2cace3a5/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:b8458003118a712e66286df6a707db01c52c0f52f7db8e4a38f0da1d3b94fc4e", size = 2323990, upload-time = "2026-05-06T13:40:29.971Z" }, + { url = "https://files.pythonhosted.org/packages/05/db/d9ba624cc4a5aced1598e88c04fdbd8310c8a69b9d38b9a3d39ce3a61ed7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:372429a130e469c9cd698925ce5fc50940b7a1336b0d82038e63d5bbc4edc519", size = 2369978, upload-time = "2026-05-06T13:37:23.027Z" }, + { url = "https://files.pythonhosted.org/packages/f2/20/d15df15ba918c423461905802bfd2981c3af0bfa0e40d05e13edbfa48bc3/pydantic_core-2.46.4-cp314-cp314-win32.whl", hash = "sha256:85bb3611ff1802f3ee7fdd7dbff26b56f343fb432d57a4728fdd49b6ef35e2f4", size = 1966354, upload-time = "2026-05-06T13:38:03.499Z" }, + { url = "https://files.pythonhosted.org/packages/fc/b6/6b8de4c0a7d7ab3004c439c80c5c1e0a3e8d78bbae19379b01960383d9e5/pydantic_core-2.46.4-cp314-cp314-win_amd64.whl", hash = "sha256:811ff8e9c313ab425368bcbb36e5c4ebd7108c2bbf4e4089cfbb0b01eff63fac", size = 2072238, upload-time = "2026-05-06T13:39:40.807Z" }, + { url = "https://files.pythonhosted.org/packages/32/36/51eb763beec1f4cf59b1db243a7dcc39cbb41230f050a09b9d69faaf0a48/pydantic_core-2.46.4-cp314-cp314-win_arm64.whl", hash = "sha256:bfec22eab3c8cc2ceec0248aec886624116dc079afa027ecc8ad4a7e62010f8a", size = 2018251, upload-time = "2026-05-06T13:37:26.72Z" }, + { url = "https://files.pythonhosted.org/packages/e8/91/855af51d625b23aa987116a19e231d2aaef9c4a415273ddc189b79a45fee/pydantic_core-2.46.4-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:af8244b2bef6aaad6d92cda81372de7f8c8d36c9f0c3ea36e827c60e7d9467a0", size = 2099593, upload-time = "2026-05-06T13:39:47.682Z" }, + { url = "https://files.pythonhosted.org/packages/fb/1b/8784a54c65edb5f49f0a14d6977cf1b209bba85a4c77445b255c2de58ab3/pydantic_core-2.46.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a4330cdbc57162e4b3aa303f588ba752257694c9c9be3e7ebb11b4aca659b5d", size = 1935226, upload-time = "2026-05-06T13:40:40.428Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e7/1955d28d1afc56dd4b3ad7cc0cf39df1b9852964cf16e5d13912756d6d6b/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c61fc04a3d840155ff08e475a04809278972fe6aef51e2720554e96367e34b", size = 1974605, upload-time = "2026-05-06T13:37:32.029Z" }, + { url = "https://files.pythonhosted.org/packages/93/e2/3fedbf0ba7a22850e6e9fd78117f1c0f10f950182344d8a6c535d468fdd8/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c50f2528cf200c5eed56faf3f4e22fcd5f38c157a8b78576e6ba3168ec35f000", size = 2030777, upload-time = "2026-05-06T13:38:55.239Z" }, + { url = "https://files.pythonhosted.org/packages/f8/61/46be275fcaaba0b4f5b9669dd852267ce1ff616592dccf7a7845588df091/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cbe8b01f948de4286c74cdd6c667aceb38f5c1e26f0693b3983d9d74887c65e", size = 2236641, upload-time = "2026-05-06T13:37:08.096Z" }, + { url = "https://files.pythonhosted.org/packages/60/db/12e93e46a8bac9988be3c016860f83293daea8c716c029c9ace279036f2f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:617d7e2ca7dcb8c5cf6bcb8c59b8832c94b36196bbf1cbd1bfb56ed341905edd", size = 2286404, upload-time = "2026-05-06T13:40:20.221Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4a/4d8b19008f38d31c53b8219cfedc2e3d5de5fe99d90076b7e767de29274f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7027560ee92211647d0d34e3f7cd6f50da56399d26a9c8ad0da286d3869a53f3", size = 2109219, upload-time = "2026-05-06T13:38:12.153Z" }, + { url = "https://files.pythonhosted.org/packages/88/70/3cbc40978fefb7bb09c6708d40d4ad1a5d70fd7213c3d17f971de868ec1f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:f99626688942fb746e545232e7726926f3be91b5975f8b55327665fafda991c7", size = 2110594, upload-time = "2026-05-06T13:40:02.971Z" }, + { url = "https://files.pythonhosted.org/packages/9d/20/b8d36736216e29491125531685b2f9e61aa5b4b2599893f8268551da3338/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc3e9034a63de20e15e8ade85358bc6efc614008cab72898b4b4952bea0509ff", size = 2159542, upload-time = "2026-05-06T13:39:27.506Z" }, + { url = "https://files.pythonhosted.org/packages/1d/a2/367df868eb584dacf6bf82a389272406d7178e301c4ac82545ab98bc2dd9/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:97e7cf2be5c77b7d1a9713a05605d49460d02c6078d38d8bef3cbe323c548424", size = 2168146, upload-time = "2026-05-06T13:38:31.93Z" }, + { url = "https://files.pythonhosted.org/packages/c1/b8/4460f77f7e201893f649a29ab355dddd3beee8a97bcb1a320db414f9a06e/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:3bf92c5d0e00fefaab325a4d27828fe6b6e2a21848686b5b60d2d9eeb09d76c6", size = 2306309, upload-time = "2026-05-06T13:37:44.717Z" }, + { url = "https://files.pythonhosted.org/packages/64/c4/be2639293acd87dc8ddbcec41a73cee9b2ebf996fe6d892a1a74e88ad3f7/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:3ecbc122d18468d06ca279dc26a8c2e2d5acb10943bb35e36ae92096dc3b5565", size = 2369736, upload-time = "2026-05-06T13:37:05.645Z" }, + { url = "https://files.pythonhosted.org/packages/30/a6/9f9f380dbb301f67023bf8f707aaa75daadf84f7152d95c410fd7e81d994/pydantic_core-2.46.4-cp314-cp314t-win32.whl", hash = "sha256:e846ae7835bf0703ae43f534ab79a867146dadd59dc9ca5c8b53d5c8f7c9ef02", size = 1955575, upload-time = "2026-05-06T13:38:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/40/1f/f1eb9eb350e795d1af8586289746f5c5677d16043040d63710e22abc43c9/pydantic_core-2.46.4-cp314-cp314t-win_amd64.whl", hash = "sha256:2108ba5c1c1eca18030634489dc544844144ee36357f2f9f780b93e7ddbb44b5", size = 2051624, upload-time = "2026-05-06T13:38:21.672Z" }, + { url = "https://files.pythonhosted.org/packages/f6/d2/42dd53d0a85c27606f316d3aa5d2869c4e8470a5ed6dec30e4a1abe19192/pydantic_core-2.46.4-cp314-cp314t-win_arm64.whl", hash = "sha256:4fcbe087dbc2068af7eda3aa87634eba216dbda64d1ae73c8684b621d33f6596", size = 2017325, upload-time = "2026-05-06T13:40:52.723Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1d/8987ad40f65ae1432753072f214fb5c74fe47ffbd0698bb9cbbb585664f8/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:1d8ba486450b14f3b1d63bc521d410ec7565e52f887b9fb671791886436a42f7", size = 2095527, upload-time = "2026-05-06T13:39:52.283Z" }, + { url = "https://files.pythonhosted.org/packages/64/d3/84c282a7eee1d3ac4c0377546ef5a1ea436ce26840d9ac3b7ed54a377507/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:3009f12e4e90b7f88b4f9adb1b0c4a3d58fe7820f3238c190047209d148026df", size = 1936024, upload-time = "2026-05-06T13:40:15.671Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ca/eac61596cdeb4d7e174d3dc0bd8a6238f14f75f97a24e7b7db4c7e7340a0/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad785e92e6dc634c21555edc8bd6b64957ab844541bcb96a1366c202951ae526", size = 1990696, upload-time = "2026-05-06T13:38:34.717Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590, upload-time = "2026-05-06T13:39:29.883Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pyiceberg" +version = "0.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "click" }, + { name = "fsspec" }, + { name = "mmh3" }, + { name = "pydantic" }, + { name = "pyparsing" }, + { name = "pyroaring" }, + { name = "requests" }, + { name = "rich" }, + { name = "strictyaml" }, + { name = "tenacity" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/f0/7616676603fdbd05ab97816337a9b31be08a5f9e1ffd636260812b217e0f/pyiceberg-0.11.1.tar.gz", hash = "sha256:366fe0d5a74e3cf1d4e7cbf3c49e308da60e7835ea268667be9185388f05d7a5", size = 1076075, upload-time = "2026-03-03T00:10:27.61Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/84/a140466b7e0841207e6b77042e03d4ab3a4f9d47e00f0bbbcc5420792bbb/pyiceberg-0.11.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd423b8ee2f75fc9db09158875abe5e2c952a26ae5e521c3265ab2f9d3511ddf", size = 532981, upload-time = "2026-03-03T00:10:08.906Z" }, + { url = "https://files.pythonhosted.org/packages/17/10/6bedd784010f707680ffd0606d4d11394cf915f4f9f54ae16e8007e00ad4/pyiceberg-0.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e273242cdca56029af694d7ce18075d47a74d034326d663ff6dd2655a6f44825", size = 533188, upload-time = "2026-03-03T00:10:10.086Z" }, + { url = "https://files.pythonhosted.org/packages/f1/a3/79db617c3cffc963efa8a332707079d3f22fd58067b31a208d358dd89b39/pyiceberg-0.11.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b347d3cc8510f8fbe191956fcda7da372ebb3302789acefca08e352345959003", size = 729546, upload-time = "2026-03-03T00:10:11.413Z" }, + { url = "https://files.pythonhosted.org/packages/06/64/acc11d230c33817bced80d9d947bb49e7bb3a429d76d906523e3df86faf8/pyiceberg-0.11.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba3a35b4648694783aeae5b77c235a57191c8b1b375c8602b03ae56a6cf4fe7", size = 730263, upload-time = "2026-03-03T00:10:13.283Z" }, + { url = "https://files.pythonhosted.org/packages/8d/1a/fb067d5150c7309fbf5dd126c648a6afed6259e7bc924ba3c65d0f87a333/pyiceberg-0.11.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0f958cbca18d05846e3081dfff8575e73d45595441d659847479656dc76f91d", size = 724064, upload-time = "2026-03-03T00:10:14.55Z" }, + { url = "https://files.pythonhosted.org/packages/c1/71/103fdba5b144d55f3bb07347893737cc1d8fd71308108a77b7817c92c544/pyiceberg-0.11.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c62636a1e9d8a1fc74ffb70383939b9cd93f2c9ee8e12015a50dd75c98a989e", size = 727239, upload-time = "2026-03-03T00:10:16.204Z" }, + { url = "https://files.pythonhosted.org/packages/18/c3/4db64429304c58c039f8e842cd37a9a1c472f596c2868ed2a5d2907b17ed/pyiceberg-0.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:1d6b6f0c1e7dd8357f1ba56524bfc870d04ad3c00979db291784a7145497ad3b", size = 531309, upload-time = "2026-03-03T00:10:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/4c/a122d80d98cb6125d87024681263406433f0c25c699d503f5633521e6809/pyiceberg-0.11.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7ec5db19feab98a31fcd5caccf4a9a4e83f96933d1ca393ba7aea665710c2bb", size = 532644, upload-time = "2026-03-03T00:10:18.574Z" }, + { url = "https://files.pythonhosted.org/packages/10/94/9a8fa5fc580e6dccd34bbbf51e7658cd7b49540e2458783addeff5e22a91/pyiceberg-0.11.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cec0616d2ba6e7dda6327089a2f34ec723aa9ac2c389857ef0b83f65fb135dd6", size = 532787, upload-time = "2026-03-03T00:10:19.656Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ab/ab7c88828bc17d77dbbc5a765419dfec2135629e1d74cdd0762cd38ad867/pyiceberg-0.11.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ddb360da76c62c7c23ec3da40e1af48e6712a563905fea2d1a8911ff7a3b6c4d", size = 722202, upload-time = "2026-03-03T00:10:21.012Z" }, + { url = "https://files.pythonhosted.org/packages/df/38/079cf1c0bf86da315472a926eec0dba10135f43374a2e267336eb98d8c76/pyiceberg-0.11.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d8790f420ebc484236017edba59182cf2a21bd3e4224a0bd0760a9c7268e96a", size = 724037, upload-time = "2026-03-03T00:10:22.176Z" }, + { url = "https://files.pythonhosted.org/packages/08/6b/08eaef477debb110438d943ef3f5985096f660ccb735d6344701cbd075a9/pyiceberg-0.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ae27ba4d37925d5b2cff192acaa70c8bb114d632bbc527cc91fea0370702b866", size = 716035, upload-time = "2026-03-03T00:10:23.789Z" }, + { url = "https://files.pythonhosted.org/packages/0b/59/7671d6a630ab1d85c6e7ca8ddf438dc63a0b0dd183bc4be69bf25c0fa5f6/pyiceberg-0.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:db66a4e0fdfbf4090631d59c3f65e960d9a5561e9259f6f3993cbe91e396837e", size = 720887, upload-time = "2026-03-03T00:10:24.824Z" }, + { url = "https://files.pythonhosted.org/packages/f0/2b/5c8ad37807efaedb14b20f01f36462684468c80da5b74f4018fb4c1804b5/pyiceberg-0.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:eb3a0a3e630ee89758eb96b39b456f4697732351fb0c080e9498ea578f9b71f9", size = 530923, upload-time = "2026-03-03T00:10:26.196Z" }, +] + +[[package]] +name = "pyjwt" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pyparsing" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, +] + +[[package]] +name = "pyroaring" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/46/a50510d080f8cb089303ec0f7cd80736b2949ca3d148f48f1cc90c49e345/pyroaring-1.1.0.tar.gz", hash = "sha256:f02e4021397ae02a139defdc6813b9942ab163de90affddd4ce4efbac299f619", size = 200298, upload-time = "2026-04-24T21:29:25.212Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/e9/d8dcccfdac1657e6a53b6ade0c0c71d59244316810c82537a5d634f8b7fd/pyroaring-1.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:fdf484d26016e0c016f23f2b635d2899daec034565fdcc062ed6b10f3b26a3f4", size = 334166, upload-time = "2026-04-24T21:28:07.184Z" }, + { url = "https://files.pythonhosted.org/packages/1a/16/70f8268c9bac4f6d91a82df254332edfa07020fe02e97c6d3d0295ee4db3/pyroaring-1.1.0-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:e9c2b9aa8decdcf40ed8f4c887092c20a272f8c32215c3fee65e9db92ecf418e", size = 711970, upload-time = "2026-04-24T21:28:08.929Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a6/0b88a8a8e4ffdd0bf53765c3ea17ce3b747f7de42b1f10d5c50a13ba3ecc/pyroaring-1.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5eb031237e9d39cbdfc9276facacdd88e27aefb58940bd8b56b878dfd38d6022", size = 385825, upload-time = "2026-04-24T21:28:10.077Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a7/f06a899b896bb74a031201fbba707abf23e2485f44ead28d2e91977ee204/pyroaring-1.1.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66aa6a321fbc598f26d5e66050a7f145c2253f3fe5737b589841ff0cbe5cb177", size = 2000799, upload-time = "2026-04-24T21:28:11.276Z" }, + { url = "https://files.pythonhosted.org/packages/eb/16/b13e0727ef5c91c84ac5d9b5c4af43cb3f28d8822d96d44aa4aeefc10b37/pyroaring-1.1.0-cp312-cp312-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8cff06d18c9a30f8547a92757078aa345db1ba5b22e3082a05f64e50b384e27a", size = 1843405, upload-time = "2026-04-24T21:28:12.394Z" }, + { url = "https://files.pythonhosted.org/packages/c5/4b/ff77d6eb4747c65aba49d345318059f1ccfbd206bbcd34686cea819187e9/pyroaring-1.1.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:043dbbaa905f7288c515ac06a96b67a3763f35e9ae06f0c0278c0d9964d16760", size = 2224307, upload-time = "2026-04-24T21:28:13.569Z" }, + { url = "https://files.pythonhosted.org/packages/50/d1/9b4e2175a9bd07592ef1c6f4692ba0cfe3abd54f33ebd574aa2b2ab88c2c/pyroaring-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:71fec09bd42f8c33ac3b762cd00c5db842eb583ffd0e361739ce1c17ad078a6a", size = 2902586, upload-time = "2026-04-24T21:28:15.485Z" }, + { url = "https://files.pythonhosted.org/packages/67/24/904952d6bfe2e4946f361958157774230cb1ac171d306e2460620274c58a/pyroaring-1.1.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c9f30ca28b991a920b446ed3ee19c7ecafcc49c46db592abf89cf239a7bb45f4", size = 2741581, upload-time = "2026-04-24T21:28:17.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f5/8ad5605870fbdcb568f0b847e1fea24adea15e07c90231fb62f339c08b14/pyroaring-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:67650460c65bdd7b4f5078d9c955aa38f64627d02cb48f9cfb24eae84bca2aba", size = 3182906, upload-time = "2026-04-24T21:28:18.435Z" }, + { url = "https://files.pythonhosted.org/packages/cc/5d/264414a1b1bea72b2a7756e2b1dca709e5c695b0cd332a8f90c297ae3b33/pyroaring-1.1.0-cp312-cp312-win32.whl", hash = "sha256:61a8eabee99104ca197b6e7cce05dc4f27f503be52881800cd370eb5a5152d3f", size = 211231, upload-time = "2026-04-24T21:28:20.414Z" }, + { url = "https://files.pythonhosted.org/packages/ea/39/dd3341e235a3794c613ea32bd35618a88ff2ae067dbe9dd7c382c8c146d2/pyroaring-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:51ebe5e6f48e3dc9df91a4cb62137ef72e1469acd6f37479abd9991f6d945cc9", size = 263337, upload-time = "2026-04-24T21:28:21.371Z" }, + { url = "https://files.pythonhosted.org/packages/12/5d/bb8e93dd7412180c621086ed46014a0f09f9a71d9370ce8cf607c5a2cf00/pyroaring-1.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:9882a204178cc8c915e0ce30abb4bdd1668e383c571b06649d5ed272d9625877", size = 216727, upload-time = "2026-04-24T21:28:22.536Z" }, + { url = "https://files.pythonhosted.org/packages/7d/75/1d39ecb04e6cd96d191eb8884864355051df80928dd5096a9dea43fbf63b/pyroaring-1.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:72f68a16b00b35481d9b3bfe897ecd8a1f7da69efd92ba5b17347ca11c21cb0d", size = 333363, upload-time = "2026-04-24T21:28:23.838Z" }, + { url = "https://files.pythonhosted.org/packages/20/3e/65cd0871e86d11c5c5cfd0f5abb0ca80eb2b6b5dbe5a2433f315a9ebd90c/pyroaring-1.1.0-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:4c443e9f942b6089efe8c9b264576e9d116f90be28a315679375bba2d8a915d6", size = 710573, upload-time = "2026-04-24T21:28:24.884Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a2/f8f23515f41414332e60cd86e4957e2a6838070b2ad5fe25e80f136de635/pyroaring-1.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:3beb40eb1220d1ce4fb3661bb019e9a21857e5bb294fe8c1c5016aeb6e82318c", size = 384880, upload-time = "2026-04-24T21:28:25.864Z" }, + { url = "https://files.pythonhosted.org/packages/b0/5b/82dc44b5074a1ff62e702d12611272d1711a60d5518dab23f94e1f7a9b3d/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f1f56004e8f1c1489bf279c25f1fa4764252cd9af5fb35675774268a4a615ba", size = 1999529, upload-time = "2026-04-24T21:28:26.859Z" }, + { url = "https://files.pythonhosted.org/packages/11/40/b07bac8cdc4b709a05f5c55bb52d4f684e5ea1fadfa0b6d9decf477a9d2a/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:13660386ea8905ee4d42c21a6275463e2dc7d31e0b5d65eec210aa7043ad96f4", size = 1842927, upload-time = "2026-04-24T21:28:28.056Z" }, + { url = "https://files.pythonhosted.org/packages/0d/60/c4b511965802dfc77978a9e16f2813f47fb3083db1822019ba1bb169c685/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0dfb6cf50fd8898179e460e699a6b8326ca508c627d083f7bf62f769fe1717d5", size = 2199538, upload-time = "2026-04-24T21:28:29.425Z" }, + { url = "https://files.pythonhosted.org/packages/e8/12/38f6b50b3f3f41a8b752d3e9efcf105b18eb2c66811831059f25613734ac/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:81ebbc0c880c8a10f13118632e5c0d59159ceada8b651bba18f2e6dc70efdeda", size = 2896904, upload-time = "2026-04-24T21:28:30.67Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b6/b5436e4b93c6bf2bd3dd6ccb88cbdc64b12084151a43e2f5c94be50eb710/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:370d191b0d1b32bbd99452ef5f0485f22fcc4bf7404d33b821d0ce2459951152", size = 2733819, upload-time = "2026-04-24T21:28:31.882Z" }, + { url = "https://files.pythonhosted.org/packages/ab/8f/f392f268de9607a5c7a95aaed6b9c8a81f00c14d85c33855e9f492095478/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b3bfad0ae3ef0e67b40c193863dce8b7d79de545dadbe53c19acc3ace38f66", size = 3161730, upload-time = "2026-04-24T21:28:33.244Z" }, + { url = "https://files.pythonhosted.org/packages/9e/a1/03250fd4834b6a5c13e6600bca47ea20fda579f80bce3551d4985185d164/pyroaring-1.1.0-cp313-cp313-win32.whl", hash = "sha256:eead129046822cb0fd47c78740b81bdaffd0515c0bb0306a2318acf0f0540b58", size = 211194, upload-time = "2026-04-24T21:28:35.001Z" }, + { url = "https://files.pythonhosted.org/packages/70/63/d9b307462cddc82fe94a67d6810e5c802818690e131ba690c1de674d8558/pyroaring-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:90ab2f00c09eed5bd986a80c8641e2dc10e7aca1a2d892d89a44b396e39c08ea", size = 263110, upload-time = "2026-04-24T21:28:35.976Z" }, + { url = "https://files.pythonhosted.org/packages/d9/4a/aa6e9833a6ba9a630efdbec8783b63da6602f763b37a5b5fbc01d73a1af1/pyroaring-1.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:51dd2490a64ad4ed53c4fb58ef1ee3f84f6cbd97cdb47abd9065c9f714ab72ef", size = 216546, upload-time = "2026-04-24T21:28:37.065Z" }, + { url = "https://files.pythonhosted.org/packages/93/ab/2260fd567a2d5d957393b932ea940dc31146bd509c88164c1b786eee7836/pyroaring-1.1.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:5e337f8c5b3c2e0c27da83fc2cb702684a47eee907a960cfee964fcb5344515b", size = 335093, upload-time = "2026-04-24T21:28:38.325Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/df82a832ff3760c7c7653b80d030fb43b18eb88bfa604e7de3e84457286e/pyroaring-1.1.0-cp314-cp314-macosx_14_0_universal2.whl", hash = "sha256:53acecba8f898e96b84d4139356e30719c70358177e270055901d3ec1cb0e34c", size = 712387, upload-time = "2026-04-24T21:28:39.404Z" }, + { url = "https://files.pythonhosted.org/packages/12/b9/a94d6b2d7a1be2fa5009ecfc345bacb2ee0b536020aeb23e92c6bb7e70f2/pyroaring-1.1.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:986efb3aec7655d69c14db2309a2072dbf181bdb906091fede83ad18e316cdaf", size = 385413, upload-time = "2026-04-24T21:28:40.563Z" }, + { url = "https://files.pythonhosted.org/packages/60/6a/3658eadbe28a5a2093c27857dd21441f1ea1cede2ddbe367df76e3018859/pyroaring-1.1.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92643c9dd303de8960c3dbed93a28b8d87da5ed0a7776568979f379d7bc8a885", size = 1995135, upload-time = "2026-04-24T21:28:41.931Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e2/4706ec770790d3433520eb0ea98fc662ccb1533164fd00b01f3413c3425c/pyroaring-1.1.0-cp314-cp314-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6a1d4c59d5b23c01d62f86d57ceefd0c0977de0425aafa7069f2d70563fed3b8", size = 1833652, upload-time = "2026-04-24T21:28:43.381Z" }, + { url = "https://files.pythonhosted.org/packages/2e/38/b8b861738e49fd4c4a54bebe257dced603999365629b4e10cb85fac940b0/pyroaring-1.1.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8ac1bc26223befbca986551521f37f4c1670dfe26fccb2f0fc2775e75be99c1", size = 2188218, upload-time = "2026-04-24T21:28:44.487Z" }, + { url = "https://files.pythonhosted.org/packages/01/8c/96afa9b5f509a5c607deaf30538edb3bdf026447a864cfe3f2c3d7484875/pyroaring-1.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b490f2d22df30affbfdcbe4f7896f321edb72a8dc0cbe5f38adec3de5b947c25", size = 2898243, upload-time = "2026-04-24T21:28:45.9Z" }, + { url = "https://files.pythonhosted.org/packages/f9/1e/86f8720525250fe742fc77ea5c2a2074a1ea830efe84a79112ce6fe113d3/pyroaring-1.1.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:56a67794188275f8897a8f1fa64d6313c48241bebbdef38833063e7281b29ef8", size = 2715091, upload-time = "2026-04-24T21:28:47.721Z" }, + { url = "https://files.pythonhosted.org/packages/9d/21/29e8f58c8af2ce016904a7e6aa61be675945224971cd70f3f698e584a23f/pyroaring-1.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9d9f196007f0b15ea19c21732faacaea83cbf5946b6db4949b3b98cf871c93f0", size = 3149470, upload-time = "2026-04-24T21:28:48.918Z" }, + { url = "https://files.pythonhosted.org/packages/ab/e1/f67ef1c9de461a80707a2f2981320b1b30632720bac426a9bfd51e4744b6/pyroaring-1.1.0-cp314-cp314-win32.whl", hash = "sha256:abc0f0ce22464864fea208315d25e999e45cb5ee646ac1ca11d314a6a51dbe4a", size = 216552, upload-time = "2026-04-24T21:28:50.652Z" }, + { url = "https://files.pythonhosted.org/packages/e8/83/a8d9fee17e6eedf2a2281b2aabcdde86930408486381ec48d1f7d3404521/pyroaring-1.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:532ae6bb1d3431d9956ef07589dd5c8dd918301a83d937c7dc6e511b1364d76a", size = 270712, upload-time = "2026-04-24T21:28:51.817Z" }, + { url = "https://files.pythonhosted.org/packages/42/50/ab2bf3fe45e4c2952690d657321a8470558f92cf93cb197fe5d31f7110d2/pyroaring-1.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:d2706a89242a347be20805147d58a38f4f4d8f6846228c4ee8dfd3587113719c", size = 224783, upload-time = "2026-04-24T21:28:53.183Z" }, + { url = "https://files.pythonhosted.org/packages/d4/0c/9eb48ac698280170f184045814b7bd44829af37c1c6de79a4d7b5ea0c8b8/pyroaring-1.1.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:39eff7dd06c163c22d0a9f9fd72d27e671457bea8cdb71215382a10512539e1d", size = 345689, upload-time = "2026-04-24T21:28:54.494Z" }, + { url = "https://files.pythonhosted.org/packages/f6/61/66b18f8ed17e70f88a410dcfac21e5964c2ad01bd4d6a25024a87522c8a9/pyroaring-1.1.0-cp314-cp314t-macosx_14_0_universal2.whl", hash = "sha256:562fa04bbfd41144d1276ed79505007557c161371450d68a1d71fc83dc01d083", size = 732373, upload-time = "2026-04-24T21:28:55.525Z" }, + { url = "https://files.pythonhosted.org/packages/04/7a/976482874ea5e4476f9dd84e7d0274e480446b1b6ab45dfe301281814b3b/pyroaring-1.1.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:591e2ed4d60443dafd9075c1f72e9aaf359ccf5120e32a8c340c2b2ae3da45e7", size = 392985, upload-time = "2026-04-24T21:28:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/ea/22/1eed09ff3aa792865dd52ef447cbe52dbc5901ed88bf1cf4513f7220150e/pyroaring-1.1.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:381eda673442c389993f8b0db2dbf5d02ea8ea9aac6ba736f64cc1ffb6c96885", size = 2045479, upload-time = "2026-04-24T21:28:58.008Z" }, + { url = "https://files.pythonhosted.org/packages/7d/47/de43464f9b28c445868e4bc8f0e6c6dcd51103bd9a757e3dcd9af25a4a69/pyroaring-1.1.0-cp314-cp314t-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d9127feb5356ba3a92bdffa04c1bf6bcbc8d436369f78badf441018c3029dd63", size = 1853013, upload-time = "2026-04-24T21:28:59.265Z" }, + { url = "https://files.pythonhosted.org/packages/03/17/29b128a580ec43905fb766b934e7dcb1095059e99e38e941edf50152207b/pyroaring-1.1.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:650db21c10f42ff2b09ef02c10a779a3d59d0c7512552f3844738b30adbcb8a5", size = 2222628, upload-time = "2026-04-24T21:29:00.792Z" }, + { url = "https://files.pythonhosted.org/packages/bd/d8/bb7d69978a5fcac95da48bedb114554d8345b50b77f042e8cd2a8277bb4b/pyroaring-1.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a5fbcb86e44f1c0c9c052917eee67a04cbac9de7392fb4bc77c140ff4a7e471", size = 2931231, upload-time = "2026-04-24T21:29:02.275Z" }, + { url = "https://files.pythonhosted.org/packages/0b/12/d44d144352a4586544313a97b2576f8f8673b98c02ae7fed77d38751cc1f/pyroaring-1.1.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0f1d76ef29034017eb2cceebd5fa0504d6ced218ce6432f99da5adecbe038269", size = 2729546, upload-time = "2026-04-24T21:29:03.414Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f9/0d01c6ba01c0d01609fddb1d46138a7ae95b7db386bac4afb0ff082d5c0e/pyroaring-1.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:19d0c81c865d63791fe20e5b38733b66f4f962e677ae7e8b3d3c4947ac6e752f", size = 3177123, upload-time = "2026-04-24T21:29:04.619Z" }, + { url = "https://files.pythonhosted.org/packages/43/6d/f991526fdef3cf7739f6db0cdf12b157e840e0ddd4a7e1c2a477da9072d6/pyroaring-1.1.0-cp314-cp314t-win32.whl", hash = "sha256:1fc112b9a9890f89cc645a16604783ed7fa25299f149b0ef7b45a5e2e3c1f31f", size = 241484, upload-time = "2026-04-24T21:29:06.114Z" }, + { url = "https://files.pythonhosted.org/packages/2e/6e/fb9876940acb50df355a473c087b9924e7b3368070403683941653b6fabc/pyroaring-1.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d92a0f4c7e6bb7deeafac68c79c92ef9340895fe825cf1a31078443753ab6756", size = 304537, upload-time = "2026-04-24T21:29:07.312Z" }, + { url = "https://files.pythonhosted.org/packages/8a/e0/39afe4bddbed6276c54e35e310aa345fbeb00f8890e96e7f48cdc2be9c66/pyroaring-1.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99c42fe1449acfbf130da65e66b4d5b2726aba4497be359bae7672e38a15fc62", size = 234615, upload-time = "2026-04-24T21:29:08.751Z" }, +] + +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/c4/453c52c659521066969523e87d85d54139bbd17b78f09532fb8eb8cdb58e/pytest_asyncio-0.26.0.tar.gz", hash = "sha256:c4df2a697648241ff39e7f0e4a73050b03f123f760673956cf0d72a4990e312f", size = 54156, upload-time = "2025-03-25T06:22:28.883Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694, upload-time = "2025-03-25T06:22:27.807Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.28" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/54/a85eb421fbdd5007bc5af39d0f4ed9fa609e0fedbfdc2adcf0b34526870e/python_multipart-0.0.28.tar.gz", hash = "sha256:8550da197eac0f7ab748961fc9509b999fa2662ea25cef857f05249f6893c0f8", size = 45314, upload-time = "2026-05-10T11:05:16.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/a2/43bbc5860b5034e2af4ef99a0e04d726ff329c43e192ef3abaa8d7ecfce5/python_multipart-0.0.28-py3-none-any.whl", hash = "sha256:10faac07eb966c3f48dc415f9dee46c04cb10d58d30a35677db8027c825ed9b6", size = 29438, upload-time = "2026-05-10T11:05:15.052Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "realtime" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/a2/0328d49d3b5fb427068e9200e7de5b0d708d021a1ad98d004bc685d2529e/realtime-2.30.0.tar.gz", hash = "sha256:7aa593da52ed5f92c34ec4e50e32043afa62f219c94f717ad64a66ab0ef9f1ba", size = 18718, upload-time = "2026-05-06T17:35:23.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/75/1b2cfc949595e22d8c05a2aa2cfc222921f7f94177d7e8a90542f3f73b33/realtime-2.30.0-py3-none-any.whl", hash = "sha256:7c93b63d2cf99aa1da4fa8826b03b00cd32f7b38abb27ff47b19eb5dcb5707c6", size = 22376, upload-time = "2026-05-06T17:35:22.568Z" }, +] + +[[package]] +name = "requests" +version = "2.34.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/c3/e2a2b89f2d3e2179abd6d00ebd70bff6273f37fb3e0cc209f48b39d00cbf/requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed", size = 142856, upload-time = "2026-05-14T19:25:27.735Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075, upload-time = "2026-05-14T19:25:26.443Z" }, +] + +[[package]] +name = "respx" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/98/4e55c9c486404ec12373708d015ebce157966965a5ebe7f28ff2c784d41b/respx-0.23.1.tar.gz", hash = "sha256:242dcc6ce6b5b9bf621f5870c82a63997e8e82bc7c947f9ffe272b8f3dd5a780", size = 29243, upload-time = "2026-04-08T14:37:16.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/4a/221da6ca167db45693d8d26c7dc79ccfc978a440251bf6721c9aaf251ac0/respx-0.23.1-py2.py3-none-any.whl", hash = "sha256:b18004b029935384bccfa6d7d9d74b4ec9af73a081cc28600fffc0447f4b8c1a", size = 25557, upload-time = "2026-04-08T14:37:14.613Z" }, +] + +[[package]] +name = "rich" +version = "14.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/67/cae617f1351490c25a4b8ac3b8b63a4dda609295d8222bad12242dfdc629/rich-14.3.4.tar.gz", hash = "sha256:817e02727f2b25b40ef56f5aa2217f400c8489f79ca8f46ea2b70dd5e14558a9", size = 230524, upload-time = "2026-04-11T02:57:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/76/6d163cfac87b632216f71879e6b2cf17163f773ff59c00b5ff4900a80fa3/rich-14.3.4-py3-none-any.whl", hash = "sha256:07e7adb4690f68864777b1450859253bed81a99a31ac321ac1817b2313558952", size = 310480, upload-time = "2026-04-11T02:57:47.484Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "starlette" +version = "0.46.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" }, +] + +[[package]] +name = "storage3" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecation" }, + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "pyiceberg" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/b2/6df208d64630744704d00f2c07197170390d6b4d0098617740f6a7a4fa98/storage3-2.30.0.tar.gz", hash = "sha256:b74e3cac149f2c0553dcb5f4d55d8c35d420d88183a1a2df77727d482665972b", size = 20162, upload-time = "2026-05-06T17:35:25.71Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/5c/bb8c8cc448cfae671c4ffee67f3651892ea59b341f27bed54666190eb8ef/storage3-2.30.0-py3-none-any.whl", hash = "sha256:2bd23a34011c018bd9c130d8a70a09ebd060ae80d946c6204a6fc08161ad728d", size = 28284, upload-time = "2026-05-06T17:35:24.659Z" }, +] + +[[package]] +name = "strenum" +version = "0.4.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/ad/430fb60d90e1d112a62ff57bdd1f286ec73a2a0331272febfddd21f330e1/StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff", size = 23384, upload-time = "2023-06-29T22:02:58.399Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/69/297302c5f5f59c862faa31e6cb9a4cd74721cd1e052b38e464c5b402df8b/StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659", size = 8851, upload-time = "2023-06-29T22:02:56.947Z" }, +] + +[[package]] +name = "strictyaml" +version = "1.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/08/efd28d49162ce89c2ad61a88bd80e11fb77bc9f6c145402589112d38f8af/strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407", size = 115206, upload-time = "2023-03-10T12:50:27.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7c/a81ef5ef10978dd073a854e0fa93b5d8021d0594b639cc8f6453c3c78a1d/strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7", size = 123917, upload-time = "2023-03-10T12:50:17.242Z" }, +] + +[[package]] +name = "supabase" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "postgrest" }, + { name = "realtime" }, + { name = "storage3" }, + { name = "supabase-auth" }, + { name = "supabase-functions" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5c/a6/d2b17021c2db1a9d219c383e0762ac03a62b25468e61ab126b6b561c2f21/supabase-2.30.0.tar.gz", hash = "sha256:efdba41d474038ed220736ba4e64946df56043057ad785c4c3499d27e459975c", size = 9689, upload-time = "2026-05-06T17:35:27.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/82/d213be7d0ce0bb18018744c0ee38ba0d6648d41dbc46ac8558cffe80541f/supabase-2.30.0-py3-none-any.whl", hash = "sha256:f9b259194554f7bfd2dca6c23261f2df588016ca18b18e774f4d85bc941edb03", size = 16634, upload-time = "2026-05-06T17:35:26.696Z" }, +] + +[[package]] +name = "supabase-auth" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "pyjwt", extra = ["crypto"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/8a/48bbbe0b6703d0670b67e45b90d6a791fd01aace67443d286f760bf48895/supabase_auth-2.30.0.tar.gz", hash = "sha256:6138a53a306a95ed59c03d4e4975469dfc3343a0ade33cc4b37e4ef967ad83f8", size = 39135, upload-time = "2026-05-06T17:35:30.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/40/a99cb4373353bcbf302d962e51da9eac78b3b0f257eb0362c0852b1667f4/supabase_auth-2.30.0-py3-none-any.whl", hash = "sha256:e85e1f51ec0de2172c3a2a8514205f71731a9914f9a770ed199ac0cf054bc82c", size = 48352, upload-time = "2026-05-06T17:35:28.936Z" }, +] + +[[package]] +name = "supabase-functions" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", extra = ["http2"] }, + { name = "strenum" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/e6/5cd8559ec2bb332e6027840c1be292f9989c2fc7b47bf40800aec5586791/supabase_functions-2.30.0.tar.gz", hash = "sha256:025acfd25f1c000ba43d0f7b8e366b0d2e9dfc784b842528e21973eb33006113", size = 4683, upload-time = "2026-05-06T17:35:32.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/da/9dedab32775df04cc22ca72f194b78e895d940f195bed3e02882a65daa9b/supabase_functions-2.30.0-py3-none-any.whl", hash = "sha256:92419459f102767b954cd034856e4ded8e34c78660b32442d66c8b2899c68011", size = 8803, upload-time = "2026-05-06T17:35:31.342Z" }, +] + +[[package]] +name = "tenacity" +version = "9.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.32.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6a/3c/21dba3e7d76138725ef307e3d7ddd29b763119b3aa459d02cc05fefcff75/uvicorn-0.32.1.tar.gz", hash = "sha256:ee9519c246a72b1c084cea8d3b44ed6026e78a4a309cbedae9c37e4cb9fbb175", size = 77630, upload-time = "2024-11-20T19:41:13.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/c1/2d27b0a15826c2b71dcf6e2f5402181ef85acf439617bb2f1453125ce1f3/uvicorn-0.32.1-py3-none-any.whl", hash = "sha256:82ad92fd58da0d12af7482ecdb5f2470a04c9c9a53ced65b9bbb4a205377602e", size = 63828, upload-time = "2024-11-20T19:41:11.244Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, + { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, +] + +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, +] + +[[package]] +name = "yarl" +version = "1.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "multidict" }, + { name = "propcache" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" }, + { url = "https://files.pythonhosted.org/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" }, + { url = "https://files.pythonhosted.org/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" }, + { url = "https://files.pythonhosted.org/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" }, + { url = "https://files.pythonhosted.org/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" }, + { url = "https://files.pythonhosted.org/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" }, + { url = "https://files.pythonhosted.org/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" }, + { url = "https://files.pythonhosted.org/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" }, + { url = "https://files.pythonhosted.org/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" }, + { url = "https://files.pythonhosted.org/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" }, + { url = "https://files.pythonhosted.org/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" }, + { url = "https://files.pythonhosted.org/packages/9a/4b/a0a6e5d0ee8a2f3a373ddef8a4097d74ac901ac363eea1440464ccbe0898/yarl-1.23.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:16c6994ac35c3e74fb0ae93323bf8b9c2a9088d55946109489667c510a7d010e", size = 123796, upload-time = "2026-03-01T22:05:41.412Z" }, + { url = "https://files.pythonhosted.org/packages/67/b6/8925d68af039b835ae876db5838e82e76ec87b9782ecc97e192b809c4831/yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a42e651629dafb64fd5b0286a3580613702b5809ad3f24934ea87595804f2c5", size = 86547, upload-time = "2026-03-01T22:05:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/ae/50/06d511cc4b8e0360d3c94af051a768e84b755c5eb031b12adaaab6dec6e5/yarl-1.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c6b9461a2a8b47c65eef63bb1c76a4f1c119618ffa99ea79bc5bb1e46c5821b", size = 85854, upload-time = "2026-03-01T22:05:44.85Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f4/4e30b250927ffdab4db70da08b9b8d2194d7c7b400167b8fbeca1e4701ca/yarl-1.23.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2569b67d616eab450d262ca7cb9f9e19d2f718c70a8b88712859359d0ab17035", size = 98351, upload-time = "2026-03-01T22:05:46.836Z" }, + { url = "https://files.pythonhosted.org/packages/86/fc/4118c5671ea948208bdb1492d8b76bdf1453d3e73df051f939f563e7dcc5/yarl-1.23.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e9d9a4d06d3481eab79803beb4d9bd6f6a8e781ec078ac70d7ef2dcc29d1bea5", size = 92711, upload-time = "2026-03-01T22:05:48.316Z" }, + { url = "https://files.pythonhosted.org/packages/56/11/1ed91d42bd9e73c13dc9e7eb0dd92298d75e7ac4dd7f046ad0c472e231cd/yarl-1.23.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f514f6474e04179d3d33175ed3f3e31434d3130d42ec153540d5b157deefd735", size = 106014, upload-time = "2026-03-01T22:05:50.028Z" }, + { url = "https://files.pythonhosted.org/packages/ce/c9/74e44e056a23fbc33aca71779ef450ca648a5bc472bdad7a82339918f818/yarl-1.23.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fda207c815b253e34f7e1909840fd14299567b1c0eb4908f8c2ce01a41265401", size = 105557, upload-time = "2026-03-01T22:05:51.416Z" }, + { url = "https://files.pythonhosted.org/packages/66/fe/b1e10b08d287f518994f1e2ff9b6d26f0adeecd8dd7d533b01bab29a3eda/yarl-1.23.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34b6cf500e61c90f305094911f9acc9c86da1a05a7a3f5be9f68817043f486e4", size = 101559, upload-time = "2026-03-01T22:05:52.872Z" }, + { url = "https://files.pythonhosted.org/packages/72/59/c5b8d94b14e3d3c2a9c20cb100119fd534ab5a14b93673ab4cc4a4141ea5/yarl-1.23.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d7504f2b476d21653e4d143f44a175f7f751cd41233525312696c76aa3dbb23f", size = 100502, upload-time = "2026-03-01T22:05:54.954Z" }, + { url = "https://files.pythonhosted.org/packages/77/4f/96976cb54cbfc5c9fd73ed4c51804f92f209481d1fb190981c0f8a07a1d7/yarl-1.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:578110dd426f0d209d1509244e6d4a3f1a3e9077655d98c5f22583d63252a08a", size = 98027, upload-time = "2026-03-01T22:05:56.409Z" }, + { url = "https://files.pythonhosted.org/packages/63/6e/904c4f476471afdbad6b7e5b70362fb5810e35cd7466529a97322b6f5556/yarl-1.23.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:609d3614d78d74ebe35f54953c5bbd2ac647a7ddb9c30a5d877580f5e86b22f2", size = 95369, upload-time = "2026-03-01T22:05:58.141Z" }, + { url = "https://files.pythonhosted.org/packages/9d/40/acfcdb3b5f9d68ef499e39e04d25e141fe90661f9d54114556cf83be8353/yarl-1.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4966242ec68afc74c122f8459abd597afd7d8a60dc93d695c1334c5fd25f762f", size = 105565, upload-time = "2026-03-01T22:06:00.286Z" }, + { url = "https://files.pythonhosted.org/packages/5e/c6/31e28f3a6ba2869c43d124f37ea5260cac9c9281df803c354b31f4dd1f3c/yarl-1.23.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e0fd068364a6759bc794459f0a735ab151d11304346332489c7972bacbe9e72b", size = 99813, upload-time = "2026-03-01T22:06:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/08/1f/6f65f59e72d54aa467119b63fc0b0b1762eff0232db1f4720cd89e2f4a17/yarl-1.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:39004f0ad156da43e86aa71f44e033de68a44e5a31fc53507b36dd253970054a", size = 105632, upload-time = "2026-03-01T22:06:03.188Z" }, + { url = "https://files.pythonhosted.org/packages/a3/c4/18b178a69935f9e7a338127d5b77d868fdc0f0e49becd286d51b3a18c61d/yarl-1.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e5723c01a56c5028c807c701aa66722916d2747ad737a046853f6c46f4875543", size = 101895, upload-time = "2026-03-01T22:06:04.651Z" }, + { url = "https://files.pythonhosted.org/packages/8f/54/f5b870b5505663911dba950a8e4776a0dbd51c9c54c0ae88e823e4b874a0/yarl-1.23.0-cp313-cp313-win32.whl", hash = "sha256:1b6b572edd95b4fa8df75de10b04bc81acc87c1c7d16bcdd2035b09d30acc957", size = 82356, upload-time = "2026-03-01T22:06:06.04Z" }, + { url = "https://files.pythonhosted.org/packages/7a/84/266e8da36879c6edcd37b02b547e2d9ecdfea776be49598e75696e3316e1/yarl-1.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:baaf55442359053c7d62f6f8413a62adba3205119bcb6f49594894d8be47e5e3", size = 87515, upload-time = "2026-03-01T22:06:08.107Z" }, + { url = "https://files.pythonhosted.org/packages/00/fd/7e1c66efad35e1649114fa13f17485f62881ad58edeeb7f49f8c5e748bf9/yarl-1.23.0-cp313-cp313-win_arm64.whl", hash = "sha256:fb4948814a2a98e3912505f09c9e7493b1506226afb1f881825368d6fb776ee3", size = 81785, upload-time = "2026-03-01T22:06:10.181Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fc/119dd07004f17ea43bb91e3ece6587759edd7519d6b086d16bfbd3319982/yarl-1.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:aecfed0b41aa72b7881712c65cf764e39ce2ec352324f5e0837c7048d9e6daaa", size = 130719, upload-time = "2026-03-01T22:06:11.708Z" }, + { url = "https://files.pythonhosted.org/packages/e6/0d/9f2348502fbb3af409e8f47730282cd6bc80dec6630c1e06374d882d6eb2/yarl-1.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a41bcf68efd19073376eb8cf948b8d9be0af26256403e512bb18f3966f1f9120", size = 89690, upload-time = "2026-03-01T22:06:13.429Z" }, + { url = "https://files.pythonhosted.org/packages/50/93/e88f3c80971b42cfc83f50a51b9d165a1dbf154b97005f2994a79f212a07/yarl-1.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cde9a2ecd91668bcb7f077c4966d8ceddb60af01b52e6e3e2680e4cf00ad1a59", size = 89851, upload-time = "2026-03-01T22:06:15.53Z" }, + { url = "https://files.pythonhosted.org/packages/1c/07/61c9dd8ba8f86473263b4036f70fb594c09e99c0d9737a799dfd8bc85651/yarl-1.23.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5023346c4ee7992febc0068e7593de5fa2bf611848c08404b35ebbb76b1b0512", size = 95874, upload-time = "2026-03-01T22:06:17.553Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e9/f9ff8ceefba599eac6abddcfb0b3bee9b9e636e96dbf54342a8577252379/yarl-1.23.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1009abedb49ae95b136a8904a3f71b342f849ffeced2d3747bf29caeda218c4", size = 88710, upload-time = "2026-03-01T22:06:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/eb/78/0231bfcc5d4c8eec220bc2f9ef82cb4566192ea867a7c5b4148f44f6cbcd/yarl-1.23.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a8d00f29b42f534cc8aa3931cfe773b13b23e561e10d2b26f27a8d309b0e82a1", size = 101033, upload-time = "2026-03-01T22:06:21.203Z" }, + { url = "https://files.pythonhosted.org/packages/cd/9b/30ea5239a61786f18fd25797151a17fbb3be176977187a48d541b5447dd4/yarl-1.23.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:95451e6ce06c3e104556d73b559f5da6c34a069b6b62946d3ad66afcd51642ea", size = 100817, upload-time = "2026-03-01T22:06:22.738Z" }, + { url = "https://files.pythonhosted.org/packages/62/e2/a4980481071791bc83bce2b7a1a1f7adcabfa366007518b4b845e92eeee3/yarl-1.23.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:531ef597132086b6cf96faa7c6c1dcd0361dd5f1694e5cc30375907b9b7d3ea9", size = 97482, upload-time = "2026-03-01T22:06:24.21Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1e/304a00cf5f6100414c4b5a01fc7ff9ee724b62158a08df2f8170dfc72a2d/yarl-1.23.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:88f9fb0116fbfcefcab70f85cf4b74a2b6ce5d199c41345296f49d974ddb4123", size = 95949, upload-time = "2026-03-01T22:06:25.697Z" }, + { url = "https://files.pythonhosted.org/packages/68/03/093f4055ed4cae649ac53bca3d180bd37102e9e11d048588e9ab0c0108d0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24", size = 95839, upload-time = "2026-03-01T22:06:27.309Z" }, + { url = "https://files.pythonhosted.org/packages/b9/28/4c75ebb108f322aa8f917ae10a8ffa4f07cae10a8a627b64e578617df6a0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:115136c4a426f9da976187d238e84139ff6b51a20839aa6e3720cd1026d768de", size = 90696, upload-time = "2026-03-01T22:06:29.048Z" }, + { url = "https://files.pythonhosted.org/packages/23/9c/42c2e2dd91c1a570402f51bdf066bfdb1241c2240ba001967bad778e77b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ead11956716a940c1abc816b7df3fa2b84d06eaed8832ca32f5c5e058c65506b", size = 100865, upload-time = "2026-03-01T22:06:30.525Z" }, + { url = "https://files.pythonhosted.org/packages/74/05/1bcd60a8a0a914d462c305137246b6f9d167628d73568505fce3f1cb2e65/yarl-1.23.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:fe8f8f5e70e6dbdfca9882cd9deaac058729bcf323cf7a58660901e55c9c94f6", size = 96234, upload-time = "2026-03-01T22:06:32.692Z" }, + { url = "https://files.pythonhosted.org/packages/90/b2/f52381aac396d6778ce516b7bc149c79e65bfc068b5de2857ab69eeea3b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a0e317df055958a0c1e79e5d2aa5a5eaa4a6d05a20d4b0c9c3f48918139c9fc6", size = 100295, upload-time = "2026-03-01T22:06:34.268Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/638bae5bbf1113a659b2435d8895474598afe38b4a837103764f603aba56/yarl-1.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f0fd84de0c957b2d280143522c4f91a73aada1923caee763e24a2b3fda9f8a5", size = 97784, upload-time = "2026-03-01T22:06:35.864Z" }, + { url = "https://files.pythonhosted.org/packages/80/25/a3892b46182c586c202629fc2159aa13975d3741d52ebd7347fd501d48d5/yarl-1.23.0-cp313-cp313t-win32.whl", hash = "sha256:93a784271881035ab4406a172edb0faecb6e7d00f4b53dc2f55919d6c9688595", size = 88313, upload-time = "2026-03-01T22:06:37.39Z" }, + { url = "https://files.pythonhosted.org/packages/43/68/8c5b36aa5178900b37387937bc2c2fe0e9505537f713495472dcf6f6fccc/yarl-1.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dd00607bffbf30250fe108065f07453ec124dbf223420f57f5e749b04295e090", size = 94932, upload-time = "2026-03-01T22:06:39.579Z" }, + { url = "https://files.pythonhosted.org/packages/c6/cc/d79ba8292f51f81f4dc533a8ccfb9fc6992cabf0998ed3245de7589dc07c/yarl-1.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ac09d42f48f80c9ee1635b2fcaa819496a44502737660d3c0f2ade7526d29144", size = 84786, upload-time = "2026-03-01T22:06:41.988Z" }, + { url = "https://files.pythonhosted.org/packages/90/98/b85a038d65d1b92c3903ab89444f48d3cee490a883477b716d7a24b1a78c/yarl-1.23.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:21d1b7305a71a15b4794b5ff22e8eef96ff4a6d7f9657155e5aa419444b28912", size = 124455, upload-time = "2026-03-01T22:06:43.615Z" }, + { url = "https://files.pythonhosted.org/packages/39/54/bc2b45559f86543d163b6e294417a107bb87557609007c007ad889afec18/yarl-1.23.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:85610b4f27f69984932a7abbe52703688de3724d9f72bceb1cca667deff27474", size = 86752, upload-time = "2026-03-01T22:06:45.425Z" }, + { url = "https://files.pythonhosted.org/packages/24/f9/e8242b68362bffe6fb536c8db5076861466fc780f0f1b479fc4ffbebb128/yarl-1.23.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23f371bd662cf44a7630d4d113101eafc0cfa7518a2760d20760b26021454719", size = 86291, upload-time = "2026-03-01T22:06:46.974Z" }, + { url = "https://files.pythonhosted.org/packages/ea/d8/d1cb2378c81dd729e98c716582b1ccb08357e8488e4c24714658cc6630e8/yarl-1.23.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a80f77dc1acaaa61f0934176fccca7096d9b1ff08c8ba9cddf5ae034a24319", size = 99026, upload-time = "2026-03-01T22:06:48.459Z" }, + { url = "https://files.pythonhosted.org/packages/0a/ff/7196790538f31debe3341283b5b0707e7feb947620fc5e8236ef28d44f72/yarl-1.23.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:bd654fad46d8d9e823afbb4f87c79160b5a374ed1ff5bde24e542e6ba8f41434", size = 92355, upload-time = "2026-03-01T22:06:50.306Z" }, + { url = "https://files.pythonhosted.org/packages/c1/56/25d58c3eddde825890a5fe6aa1866228377354a3c39262235234ab5f616b/yarl-1.23.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:682bae25f0a0dd23a056739f23a134db9f52a63e2afd6bfb37ddc76292bbd723", size = 106417, upload-time = "2026-03-01T22:06:52.1Z" }, + { url = "https://files.pythonhosted.org/packages/51/8a/882c0e7bc8277eb895b31bce0138f51a1ba551fc2e1ec6753ffc1e7c1377/yarl-1.23.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a82836cab5f197a0514235aaf7ffccdc886ccdaa2324bc0aafdd4ae898103039", size = 106422, upload-time = "2026-03-01T22:06:54.424Z" }, + { url = "https://files.pythonhosted.org/packages/42/2b/fef67d616931055bf3d6764885990a3ac647d68734a2d6a9e1d13de437a2/yarl-1.23.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c57676bdedc94cd3bc37724cf6f8cd2779f02f6aba48de45feca073e714fe52", size = 101915, upload-time = "2026-03-01T22:06:55.895Z" }, + { url = "https://files.pythonhosted.org/packages/18/6a/530e16aebce27c5937920f3431c628a29a4b6b430fab3fd1c117b26ff3f6/yarl-1.23.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c7f8dc16c498ff06497c015642333219871effba93e4a2e8604a06264aca5c5c", size = 100690, upload-time = "2026-03-01T22:06:58.21Z" }, + { url = "https://files.pythonhosted.org/packages/88/08/93749219179a45e27b036e03260fda05190b911de8e18225c294ac95bbc9/yarl-1.23.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5ee586fb17ff8f90c91cf73c6108a434b02d69925f44f5f8e0d7f2f260607eae", size = 98750, upload-time = "2026-03-01T22:06:59.794Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cf/ea424a004969f5d81a362110a6ac1496d79efdc6d50c2c4b2e3ea0fc2519/yarl-1.23.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:17235362f580149742739cc3828b80e24029d08cbb9c4bda0242c7b5bc610a8e", size = 94685, upload-time = "2026-03-01T22:07:01.375Z" }, + { url = "https://files.pythonhosted.org/packages/e2/b7/14341481fe568e2b0408bcf1484c652accafe06a0ade9387b5d3fd9df446/yarl-1.23.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0793e2bd0cf14234983bbb371591e6bea9e876ddf6896cdcc93450996b0b5c85", size = 106009, upload-time = "2026-03-01T22:07:03.151Z" }, + { url = "https://files.pythonhosted.org/packages/0a/e6/5c744a9b54f4e8007ad35bce96fbc9218338e84812d36f3390cea616881a/yarl-1.23.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:3650dc2480f94f7116c364096bc84b1d602f44224ef7d5c7208425915c0475dd", size = 100033, upload-time = "2026-03-01T22:07:04.701Z" }, + { url = "https://files.pythonhosted.org/packages/0c/23/e3bfc188d0b400f025bc49d99793d02c9abe15752138dcc27e4eaf0c4a9e/yarl-1.23.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f40e782d49630ad384db66d4d8b73ff4f1b8955dc12e26b09a3e3af064b3b9d6", size = 106483, upload-time = "2026-03-01T22:07:06.231Z" }, + { url = "https://files.pythonhosted.org/packages/72/42/f0505f949a90b3f8b7a363d6cbdf398f6e6c58946d85c6d3a3bc70595b26/yarl-1.23.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94f8575fbdf81749008d980c17796097e645574a3b8c28ee313931068dad14fe", size = 102175, upload-time = "2026-03-01T22:07:08.4Z" }, + { url = "https://files.pythonhosted.org/packages/aa/65/b39290f1d892a9dd671d1c722014ca062a9c35d60885d57e5375db0404b5/yarl-1.23.0-cp314-cp314-win32.whl", hash = "sha256:c8aa34a5c864db1087d911a0b902d60d203ea3607d91f615acd3f3108ac32169", size = 83871, upload-time = "2026-03-01T22:07:09.968Z" }, + { url = "https://files.pythonhosted.org/packages/a9/5b/9b92f54c784c26e2a422e55a8d2607ab15b7ea3349e28359282f84f01d43/yarl-1.23.0-cp314-cp314-win_amd64.whl", hash = "sha256:63e92247f383c85ab00dd0091e8c3fa331a96e865459f5ee80353c70a4a42d70", size = 89093, upload-time = "2026-03-01T22:07:11.501Z" }, + { url = "https://files.pythonhosted.org/packages/e0/7d/8a84dc9381fd4412d5e7ff04926f9865f6372b4c2fd91e10092e65d29eb8/yarl-1.23.0-cp314-cp314-win_arm64.whl", hash = "sha256:70efd20be968c76ece7baa8dafe04c5be06abc57f754d6f36f3741f7aa7a208e", size = 83384, upload-time = "2026-03-01T22:07:13.069Z" }, + { url = "https://files.pythonhosted.org/packages/dd/8d/d2fad34b1c08aa161b74394183daa7d800141aaaee207317e82c790b418d/yarl-1.23.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:9a18d6f9359e45722c064c97464ec883eb0e0366d33eda61cb19a244bf222679", size = 131019, upload-time = "2026-03-01T22:07:14.903Z" }, + { url = "https://files.pythonhosted.org/packages/19/ff/33009a39d3ccf4b94d7d7880dfe17fb5816c5a4fe0096d9b56abceea9ac7/yarl-1.23.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2803ed8b21ca47a43da80a6fd1ed3019d30061f7061daa35ac54f63933409412", size = 89894, upload-time = "2026-03-01T22:07:17.372Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f1/dab7ac5e7306fb79c0190766a3c00b4cb8d09a1f390ded68c85a5934faf5/yarl-1.23.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:394906945aa8b19fc14a61cf69743a868bb8c465efe85eee687109cc540b98f4", size = 89979, upload-time = "2026-03-01T22:07:19.361Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b1/08e95f3caee1fad6e65017b9f26c1d79877b502622d60e517de01e72f95d/yarl-1.23.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71d006bee8397a4a89f469b8deb22469fe7508132d3c17fa6ed871e79832691c", size = 95943, upload-time = "2026-03-01T22:07:21.266Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/6409f9018864a6aa186c61175b977131f373f1988e198e031236916e87e4/yarl-1.23.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:62694e275c93d54f7ccedcfef57d42761b2aad5234b6be1f3e3026cae4001cd4", size = 88786, upload-time = "2026-03-01T22:07:23.129Z" }, + { url = "https://files.pythonhosted.org/packages/76/40/cc22d1d7714b717fde2006fad2ced5efe5580606cb059ae42117542122f3/yarl-1.23.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31de1613658308efdb21ada98cbc86a97c181aa050ba22a808120bb5be3ab94", size = 101307, upload-time = "2026-03-01T22:07:24.689Z" }, + { url = "https://files.pythonhosted.org/packages/8f/0d/476c38e85ddb4c6ec6b20b815bdd779aa386a013f3d8b85516feee55c8dc/yarl-1.23.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fb1e8b8d66c278b21d13b0a7ca22c41dd757a7c209c6b12c313e445c31dd3b28", size = 100904, upload-time = "2026-03-01T22:07:26.287Z" }, + { url = "https://files.pythonhosted.org/packages/72/32/0abe4a76d59adf2081dcb0397168553ece4616ada1c54d1c49d8936c74f8/yarl-1.23.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50f9d8d531dfb767c565f348f33dd5139a6c43f5cbdf3f67da40d54241df93f6", size = 97728, upload-time = "2026-03-01T22:07:27.906Z" }, + { url = "https://files.pythonhosted.org/packages/b7/35/7b30f4810fba112f60f5a43237545867504e15b1c7647a785fbaf588fac2/yarl-1.23.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:575aa4405a656e61a540f4a80eaa5260f2a38fff7bfdc4b5f611840d76e9e277", size = 95964, upload-time = "2026-03-01T22:07:30.198Z" }, + { url = "https://files.pythonhosted.org/packages/2d/86/ed7a73ab85ef00e8bb70b0cb5421d8a2a625b81a333941a469a6f4022828/yarl-1.23.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:041b1a4cefacf65840b4e295c6985f334ba83c30607441ae3cf206a0eed1a2e4", size = 95882, upload-time = "2026-03-01T22:07:32.132Z" }, + { url = "https://files.pythonhosted.org/packages/19/90/d56967f61a29d8498efb7afb651e0b2b422a1e9b47b0ab5f4e40a19b699b/yarl-1.23.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:d38c1e8231722c4ce40d7593f28d92b5fc72f3e9774fe73d7e800ec32299f63a", size = 90797, upload-time = "2026-03-01T22:07:34.404Z" }, + { url = "https://files.pythonhosted.org/packages/72/00/8b8f76909259f56647adb1011d7ed8b321bcf97e464515c65016a47ecdf0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d53834e23c015ee83a99377db6e5e37d8484f333edb03bd15b4bc312cc7254fb", size = 101023, upload-time = "2026-03-01T22:07:35.953Z" }, + { url = "https://files.pythonhosted.org/packages/ac/e2/cab11b126fb7d440281b7df8e9ddbe4851e70a4dde47a202b6642586b8d9/yarl-1.23.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2e27c8841126e017dd2a054a95771569e6070b9ee1b133366d8b31beb5018a41", size = 96227, upload-time = "2026-03-01T22:07:37.594Z" }, + { url = "https://files.pythonhosted.org/packages/c2/9b/2c893e16bfc50e6b2edf76c1a9eb6cb0c744346197e74c65e99ad8d634d0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:76855800ac56f878847a09ce6dba727c93ca2d89c9e9d63002d26b916810b0a2", size = 100302, upload-time = "2026-03-01T22:07:39.334Z" }, + { url = "https://files.pythonhosted.org/packages/28/ec/5498c4e3a6d5f1003beb23405671c2eb9cdbf3067d1c80f15eeafe301010/yarl-1.23.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e09fd068c2e169a7070d83d3bde728a4d48de0549f975290be3c108c02e499b4", size = 98202, upload-time = "2026-03-01T22:07:41.717Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c3/cd737e2d45e70717907f83e146f6949f20cc23cd4bf7b2688727763aa458/yarl-1.23.0-cp314-cp314t-win32.whl", hash = "sha256:73309162a6a571d4cbd3b6a1dcc703c7311843ae0d1578df6f09be4e98df38d4", size = 90558, upload-time = "2026-03-01T22:07:43.433Z" }, + { url = "https://files.pythonhosted.org/packages/e1/19/3774d162f6732d1cfb0b47b4140a942a35ca82bb19b6db1f80e9e7bdc8f8/yarl-1.23.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4503053d296bc6e4cbd1fad61cf3b6e33b939886c4f249ba7c78b602214fabe2", size = 97610, upload-time = "2026-03-01T22:07:45.773Z" }, + { url = "https://files.pythonhosted.org/packages/51/47/3fa2286c3cb162c71cdb34c4224d5745a1ceceb391b2bd9b19b668a8d724/yarl-1.23.0-cp314-cp314t-win_arm64.whl", hash = "sha256:44bb7bef4ea409384e3f8bc36c063d77ea1b8d4a5b2706956c0d6695f07dcc25", size = 86041, upload-time = "2026-03-01T22:07:49.026Z" }, + { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, +] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" }, + { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" }, + { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" }, + { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" }, + { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" }, + { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" }, + { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" }, + { url = "https://files.pythonhosted.org/packages/c2/38/f249a2050ad1eea0bb364046153942e34abba95dd5520af199aed86fbb49/zstandard-0.25.0-cp314-cp314-win32.whl", hash = "sha256:da469dc041701583e34de852d8634703550348d5822e66a0c827d39b05365b12", size = 444513, upload-time = "2025-09-14T22:18:20.61Z" }, + { url = "https://files.pythonhosted.org/packages/3a/43/241f9615bcf8ba8903b3f0432da069e857fc4fd1783bd26183db53c4804b/zstandard-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:c19bcdd826e95671065f8692b5a4aa95c52dc7a02a4c5a0cac46deb879a017a2", size = 516118, upload-time = "2025-09-14T22:18:17.849Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ef/da163ce2450ed4febf6467d77ccb4cd52c4c30ab45624bad26ca0a27260c/zstandard-0.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:d7541afd73985c630bafcd6338d2518ae96060075f9463d7dc14cfb33514383d", size = 476940, upload-time = "2025-09-14T22:18:19.088Z" }, +] diff --git a/deploy/daily/cdp-discover.sh b/deploy/daily/cdp-discover.sh new file mode 100755 index 0000000..d2a39b8 --- /dev/null +++ b/deploy/daily/cdp-discover.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Discover the CDP page-target WebSocket URL on autocli-chrome and +# write it to /run/cdp-endpoint.env as AUTOCLI_CDP_ENDPOINT=... +# Runs once at boot (gating supercronic + uvicorn) AND once at the +# start of every run-daily.sh (page id can change between cron ticks). +# See deploy/SPEC.md §5.2 "Discovery cadence" + "CDP page target". +# +# Chrome DevTools DNS-rebinding protection: the /json* and /devtools +# endpoints reject any HTTP Host header that is NOT an IP or "localhost" +# (error: "Host header is specified and is not an IP address or +# localhost."). Reaching Chrome by docker service name therefore fails. +# Fix: resolve CHROME_HOST -> container IP and use the IP for BOTH the +# /json probe AND the rewritten ws:// URL, so every Host header Chrome +# sees is an IP (which it accepts). The IP is re-resolved on every run, +# so a recreated chrome container with a new IP is picked up next tick. + +set -euo pipefail + +CHROME_HOST="${CHROME_HOST:-autocli-chrome}" +CHROME_PORT="${CHROME_PORT:-9222}" +DEADLINE=$(( $(date +%s) + 60 )) # 60 s budget +INTERVAL=2 + +resolve_ip() { + # getent first (glibc NSS, honours docker DNS); fall back to python. + getent hosts "${CHROME_HOST}" 2>/dev/null | awk '{print $1; exit}' && return 0 + python3 -c "import socket,sys; print(socket.gethostbyname(sys.argv[1]))" "${CHROME_HOST}" 2>/dev/null +} + +while (( $(date +%s) < DEADLINE )); do + chrome_ip="$(resolve_ip || true)" + if [[ -n "${chrome_ip}" ]]; then + base="http://${chrome_ip}:${CHROME_PORT}" + if list_json=$(curl -fsS --max-time 3 "${base}/json/list" 2>/dev/null); then + ws=$(jq -r '[.[] | select(.type=="page")][0].webSocketDebuggerUrl // empty' <<<"${list_json}") + if [[ -z "${ws}" || "${ws}" == "null" ]]; then + # No page target yet — create one. PUT, not POST/GET (Chrome >= M86). + new_json=$(curl -fsS --max-time 3 -X PUT "${base}/json/new?about:blank" 2>/dev/null || true) + ws=$(jq -r '.webSocketDebuggerUrl // empty' <<<"${new_json}") + fi + if [[ -n "${ws}" && "${ws}" != "null" ]]; then + # Chrome reports its own bind host (localhost:9223). Rewrite the + # host:port to the resolved container IP so the WS upgrade's Host + # header is an IP (passes Chrome's rebind check) and the TCP + # target is reachable from this container's netns. + rewritten=$(sed -E "s|ws://[^/]+|ws://${chrome_ip}:${CHROME_PORT}|" <<<"${ws}") + echo "AUTOCLI_CDP_ENDPOINT=${rewritten}" > /run/cdp-endpoint.env + chmod 0644 /run/cdp-endpoint.env + echo "[cdp-discover] ${rewritten}" + exit 0 + fi + fi + fi + sleep "${INTERVAL}" +done + +echo "[cdp-discover] FATAL: chrome unreachable after 60s (host=${CHROME_HOST} ip=${chrome_ip:-unresolved})" >&2 +exit 1 diff --git a/deploy/daily/crontab b/deploy/daily/crontab new file mode 100644 index 0000000..e8c50cf --- /dev/null +++ b/deploy/daily/crontab @@ -0,0 +1,10 @@ +# supercronic crontab — runs in container TZ=Europe/London. +# +# NOTE: supercronic reads this file verbatim. Shell-style env expansion +# (${CRON_SCHEDULE}, ${OUTPUT_RETENTION_DAYS}) does NOT happen here. +# Earlier comments suggested those env vars were honored — they were not, +# so the env-knobs are dropped from the compose to stop being misleading. +# To change the schedule or retention, edit THIS file (or render it from +# a template in entrypoint.sh before launching supercronic). +0 3 * * * /app/run-daily.sh +0 4 * * * find /data/output -name "*.json" -type f -mtime +30 -delete diff --git a/deploy/daily/entrypoint.sh b/deploy/daily/entrypoint.sh new file mode 100755 index 0000000..7024d66 --- /dev/null +++ b/deploy/daily/entrypoint.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Container PID 2 (tini is PID 1). Sequence: +# 1. boot-time cdp-discover (gates everything else) +# 2. start supercronic + uvicorn as background children, wait on either. + +set -euo pipefail + +echo "[entrypoint] boot cdp-discover" +/app/cdp-discover.sh + +echo "[entrypoint] starting supercronic + uvicorn" +supercronic -quiet /etc/cron.d/autocli & +CRON_PID=$! + +cd /app/api +uv run --no-project -- uvicorn main:app --host 0.0.0.0 --port 8080 & +API_PID=$! + +# Forward SIGTERM to children for graceful shutdown via tini. +trap 'kill -TERM "${CRON_PID}" "${API_PID}" 2>/dev/null || true' TERM INT + +# Exit when either child exits (compose/Watchtower can then restart cleanly). +wait -n "${CRON_PID}" "${API_PID}" +exit $? diff --git a/deploy/daily/run-daily.sh b/deploy/daily/run-daily.sh new file mode 100755 index 0000000..99b6e69 --- /dev/null +++ b/deploy/daily/run-daily.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# Daily orchestrator. Invoked by: +# * supercronic (cron tick) +# * POST /api/run (FastAPI shells out via trigger.py) +# Implements the §5.2 unified retry policy: 3 attempts at 15s/60s/240s. +# Uses flock so cron + /api/run can't collide. + +set -euo pipefail + +LOCK=/var/lock/autocli-daily.lock +LAST_RUN_JSON=/data/output/last_run.json +LOG_DIR=/data/logs +OUTPUT_DIR=/data/output +DATE_STAMP=$(date +%Y%m%d) +LOG_FILE="${LOG_DIR}/run-${DATE_STAMP}.log" + +mkdir -p "${LOG_DIR}" "${OUTPUT_DIR}" + +# Single-instance gate. -n = non-blocking; -E 200 = exit 200 if already locked. +exec 9>"${LOCK}" +if ! flock -n -E 200 9; then + echo "[run-daily] another run is in progress; exit 200" >&2 + exit 200 +fi + +run_once() { + local attempt="$1" + local started_at + started_at=$(date +%s) + echo "[run-daily] attempt ${attempt} starting at $(date -Iseconds)" | tee -a "${LOG_FILE}" + + # Refresh CDP endpoint every attempt — Chrome may have restarted. + if ! /app/cdp-discover.sh >>"${LOG_FILE}" 2>&1; then + echo "[run-daily] cdp-discover failed" >>"${LOG_FILE}" + return 1 + fi + # Source AND export — bare `source key=value` only sets a shell var, NOT an + # environment variable, so the autocli child process would not see + # AUTOCLI_CDP_ENDPOINT and would fall back to the daemon path + # ("Chrome is not running"). `set -a` makes any assignment auto-export. + set -a + # shellcheck disable=SC1091 + source /run/cdp-endpoint.env + set +a + + # `9>&-` closes FD 9 (our flock holder) on the autocli child. Without this, + # bash's lock FD inherits into autocli; if autocli ever spawns a + # `autocli --daemon` (the daemon-path fallback), that daemon will hold the + # lock for its entire lifetime — making is_running() permanently True + # even after run-daily.sh exits. We hit this exact bug in production. + local out="${OUTPUT_DIR}/${DATE_STAMP}.json" + if ! /app/bin/autocli linkedin recommended --limit 0 --with_jd true -f json > "${out}" 2>>"${LOG_FILE}" 9>&-; then + echo "[run-daily] autocli failed" >>"${LOG_FILE}" + return 2 + fi + + # Sync to Supabase. Capture stdout to its own file so we can parse the + # summary JSON directly (sync_autocli_jobs.py pretty-prints with indent=2, + # which breaks grep+tail line-based parsing). + # Same `9>&-` lock-close discipline (any of sync's children inheriting the + # lock would also pin is_running()). + local sync_out="/tmp/sync-${DATE_STAMP}-${attempt}.json" + if ! uv --project /app/api run --no-project -- python /app/scripts/sync_autocli_jobs.py --input "${out}" > "${sync_out}" 2>>"${LOG_FILE}" 9>&-; then + echo "[run-daily] sync_autocli_jobs.py failed (see ${sync_out})" >>"${LOG_FILE}" + cat "${sync_out}" >>"${LOG_FILE}" 2>/dev/null || true + return 3 + fi + cat "${sync_out}" >>"${LOG_FILE}" + + # Sponsor filter: match new jobs against UK Licensed Sponsors register. + # Non-fatal — sync already succeeded; don't fail the run for this. + # SPONSOR_CSV_DIR → persistent /data/ so CSV cache survives restarts. + local sponsor_ok="true" + echo "[run-daily] running sponsor filter" >>"${LOG_FILE}" + if ! SPONSOR_CSV_DIR=/data uv --project /app/api run --no-project -- python /app/scripts/sponsor_filter.py >>"${LOG_FILE}" 2>&1 9>&-; then + sponsor_ok="false" + echo "[run-daily] sponsor filter failed (non-fatal)" >>"${LOG_FILE}" + fi + + local ended_at + ended_at=$(date +%s) + local duration=$(( ended_at - started_at )) + + # Parse counts from the captured sync output (a complete JSON document). + local upserted scraped skipped + upserted=$(jq -r '.upserted // 0' "${sync_out}" 2>/dev/null || echo 0) + scraped=$(jq -r '.input_rows // 0' "${sync_out}" 2>/dev/null || echo 0) + skipped=$(jq -r '.skipped // 0' "${sync_out}" 2>/dev/null || echo 0) + rm -f "${sync_out}" + + local errors="[]" + if [[ "${sponsor_ok}" != "true" ]]; then + errors='["sponsor_filter_failed"]' + fi + + jq -n \ + --argjson last_run_unixts "${started_at}" \ + --argjson last_duration_seconds "${duration}" \ + --argjson last_exit_code 0 \ + --argjson rows_scraped "${scraped}" \ + --argjson rows_upserted "${upserted}" \ + --argjson rows_skipped "${skipped}" \ + --argjson sponsor_filter_ok "${sponsor_ok}" \ + --arg last_log "$(basename "${LOG_FILE}")" \ + --argjson errors "${errors}" \ + '{last_run_unixts:$last_run_unixts,last_duration_seconds:$last_duration_seconds,last_exit_code:$last_exit_code,rows_scraped:$rows_scraped,rows_upserted:$rows_upserted,rows_skipped:$rows_skipped,sponsor_filter_ok:$sponsor_filter_ok,last_log:$last_log,errors:$errors}' \ + > "${LAST_RUN_JSON}" + echo "[run-daily] attempt ${attempt} succeeded in ${duration}s" | tee -a "${LOG_FILE}" + return 0 +} + +backoffs=(15 60 240) +attempt=1 +final_rc=0 +for sleep_for in "${backoffs[@]}" final; do + if run_once "${attempt}"; then + final_rc=0 + break + fi + final_rc=$? + if [[ "${sleep_for}" == "final" ]]; then + break + fi + echo "[run-daily] sleeping ${sleep_for}s before retry" | tee -a "${LOG_FILE}" + sleep "${sleep_for}" + attempt=$(( attempt + 1 )) +done + +if (( final_rc != 0 )); then + jq -n \ + --argjson last_run_unixts "$(date +%s)" \ + --argjson last_exit_code "${final_rc}" \ + --arg last_log "$(basename "${LOG_FILE}")" \ + '{last_run_unixts:$last_run_unixts,last_exit_code:$last_exit_code,rows_scraped:0,rows_upserted:0,rows_skipped:0,last_log:$last_log,errors:["see log"]}' \ + > "${LAST_RUN_JSON}" +fi +exit "${final_rc}" diff --git a/deploy/docker-compose.local.yml b/deploy/docker-compose.local.yml new file mode 100644 index 0000000..b45e69a --- /dev/null +++ b/deploy/docker-compose.local.yml @@ -0,0 +1,32 @@ +# Local override for Phase 1 testing. +# Run: +# docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.local.yml --env-file deploy/.env.local up -d + +name: autocli-stack-local + +services: + autocli-chrome: + container_name: autocli-chrome-local + image: test-chrome:latest # built locally in Phase 0 + ports: + - "6081:6080" + - "5902:5900" + - "9223:9222" + + autocli-daily: + container_name: autocli-daily-local + image: test-daily:latest # built locally in Phase 0 + ports: + - "8081:8080" + + # No Cloudflare in local mode + cloudflared: + profiles: ["disabled"] + + prometheus: + ports: + - "9091:9090" + + grafana: + ports: + - "3001:3000" diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml new file mode 100644 index 0000000..cba56c3 --- /dev/null +++ b/deploy/docker-compose.yml @@ -0,0 +1,112 @@ +name: autocli-stack + +x-watchtower-label: &watchtower-enable + com.centurylinklabs.watchtower.enable: "true" + +services: + autocli-chrome: + image: ghcr.io/ricksanchez88e/autocli-chrome:main + container_name: autocli-chrome + restart: unless-stopped + shm_size: "2gb" + environment: + VNC_PASSWORD: ${VNC_PASSWORD} + TZ: ${TZ:-Europe/London} + ports: + # Both 6080 (noVNC) and 9222 (CDP) bound to loopback ONLY. Public access + # routes through Cloudflare Tunnel + Access (autocli-vnc. for noVNC, + # autocli-cdp. with mTLS for CDP per SPEC §5.3). Without 127.0.0.1 + # binding, anyone with Tailscale or the host IP would have direct + # unauthenticated CDP / VNC access — bypassing every auth layer. + # If you need a backup path, use `ssh -L 6080:localhost:6080` from a + # Tailscale-connected machine. + - "127.0.0.1:6080:6080" + - "127.0.0.1:9222:9222" + volumes: + - chrome-profile:/root/.config/chromium + - chrome-tmp:/tmp + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:9222/json/version"] + interval: 10s + timeout: 3s + retries: 10 + start_period: 20s + networks: [autocli-net] + labels: *watchtower-enable + + autocli-daily: + image: ghcr.io/ricksanchez88e/autocli-daily:main + container_name: autocli-daily + restart: unless-stopped + depends_on: + autocli-chrome: + condition: service_healthy + environment: + TZ: ${TZ:-Europe/London} + CHROME_HOST: autocli-chrome + CHROME_PORT: "9222" + API_RUN_TOKEN: ${API_RUN_TOKEN} + SUPABASE_URL: ${SUPABASE_URL} + SUPABASE_SERVICE_ROLE_KEY: ${SUPABASE_SERVICE_ROLE_KEY} + SUPABASE_ANON_KEY: ${SUPABASE_ANON_KEY} + volumes: + - daily-data:/data + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:8080/api/health"] + interval: 15s + timeout: 5s + retries: 6 + start_period: 60s + networks: [autocli-net] + labels: *watchtower-enable + + cloudflared: + image: cloudflare/cloudflared:2026.3.0 + container_name: autocli-cloudflared + restart: unless-stopped + command: tunnel --no-autoupdate run --token ${CLOUDFLARE_TUNNEL_TOKEN} + environment: + TUNNEL_TOKEN: ${CLOUDFLARE_TUNNEL_TOKEN} + depends_on: + autocli-daily: + condition: service_healthy + networks: [autocli-net] + + prometheus: + image: prom/prometheus:v3.5.0 + container_name: autocli-prometheus + restart: unless-stopped + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=90d + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prom-data:/prometheus + networks: [autocli-net] + + grafana: + image: grafana/grafana:11.6.0 + container_name: autocli-grafana + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} + GF_USERS_ALLOW_SIGN_UP: "false" + GF_AUTH_ANONYMOUS_ENABLED: "false" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - grafana-data:/var/lib/grafana + depends_on: + - prometheus + networks: [autocli-net] + +networks: + autocli-net: + driver: bridge + +volumes: + chrome-profile: + chrome-tmp: + daily-data: + prom-data: + grafana-data: diff --git a/deploy/grafana/provisioning/dashboards/autocli.json b/deploy/grafana/provisioning/dashboards/autocli.json new file mode 100644 index 0000000..f25ca8e --- /dev/null +++ b/deploy/grafana/provisioning/dashboards/autocli.json @@ -0,0 +1,55 @@ +{ + "schemaVersion": 39, + "title": "AutoCLI Daily", + "uid": "autocli-daily", + "tags": ["autocli"], + "timezone": "Europe/London", + "refresh": "30s", + "time": {"from": "now-30d", "to": "now"}, + "panels": [ + { + "id": 1, "type": "stat", + "title": "Time since last run", + "gridPos": {"x": 0, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "time() - autocli_daily_last_run_unixts"}], + "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "red", "value": 90000}]}}} + }, + { + "id": 2, "type": "stat", + "title": "Last exit code", + "gridPos": {"x": 6, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "autocli_daily_last_exit_code"}], + "fieldConfig": {"defaults": {"thresholds": {"mode": "absolute", "steps": [{"color": "green"}, {"color": "red", "value": 1}]}}} + }, + { + "id": 3, "type": "stat", + "title": "Rows upserted today", + "gridPos": {"x": 12, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_upserted_total[24h])"}] + }, + { + "id": 4, "type": "stat", + "title": "Chrome CDP up (24h avg)", + "gridPos": {"x": 18, "y": 0, "w": 6, "h": 4}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "avg_over_time(autocli_chrome_cdp_up[24h])"}], + "fieldConfig": {"defaults": {"unit": "percentunit", "thresholds": {"mode": "absolute", "steps": [{"color": "red"}, {"color": "yellow", "value": 0.9}, {"color": "green", "value": 0.99}]}}} + }, + { + "id": 5, "type": "timeseries", + "title": "Daily rows (scraped / upserted / skipped)", + "gridPos": {"x": 0, "y": 4, "w": 24, "h": 8}, + "targets": [ + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_scraped_total[1d])", "legendFormat": "scraped"}, + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_upserted_total[1d])", "legendFormat": "upserted"}, + {"datasource": {"uid": "prom-autocli"}, "expr": "increase(autocli_daily_rows_skipped_total[1d])", "legendFormat": "skipped"} + ] + }, + { + "id": 6, "type": "timeseries", + "title": "Run duration", + "gridPos": {"x": 0, "y": 12, "w": 24, "h": 8}, + "targets": [{"datasource": {"uid": "prom-autocli"}, "expr": "autocli_daily_last_duration_seconds", "legendFormat": "duration (s)"}], + "fieldConfig": {"defaults": {"unit": "s"}} + } + ] +} diff --git a/deploy/grafana/provisioning/dashboards/dashboards.yml b/deploy/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..48c5a90 --- /dev/null +++ b/deploy/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,11 @@ +apiVersion: 1 +providers: + - name: autocli + orgId: 1 + folder: AutoCLI + type: file + disableDeletion: true + updateIntervalSeconds: 30 + allowUiUpdates: false + options: + path: /etc/grafana/provisioning/dashboards diff --git a/deploy/grafana/provisioning/datasources/prometheus.yml b/deploy/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..e8bbe9f --- /dev/null +++ b/deploy/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + uid: prom-autocli + url: http://prometheus:9090 + access: proxy + isDefault: true + editable: false diff --git a/deploy/prometheus/prometheus.yml b/deploy/prometheus/prometheus.yml new file mode 100644 index 0000000..c0c8e8a --- /dev/null +++ b/deploy/prometheus/prometheus.yml @@ -0,0 +1,10 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: autocli-daily + metrics_path: /api/metrics + static_configs: + - targets: + - autocli-daily:8080 diff --git a/docs/supabase/ats_jobs_external_url_contract.sql b/docs/supabase/ats_jobs_external_url_contract.sql new file mode 100644 index 0000000..f6257c1 --- /dev/null +++ b/docs/supabase/ats_jobs_external_url_contract.sql @@ -0,0 +1,23 @@ +-- ATS Form Intelligence job external URL contract. +-- +-- This file documents the Supabase jobs-table fields required by the ATS +-- worker. It is intentionally not wired into an automatic migration runner. +-- Review against the live Supabase schema before applying. + +alter table public.jobs + add column if not exists external_url text, + add column if not exists external_url_hash text, + add column if not exists ats_platform text, + add column if not exists ats_intel_status text, + add column if not exists ats_intel_id uuid, + add column if not exists ats_intel_error text, + add column if not exists ats_intel_requested_at timestamptz, + add column if not exists ats_intel_completed_at timestamptz; + +create index if not exists jobs_external_url_hash_idx + on public.jobs (external_url_hash) + where external_url_hash is not null; + +create index if not exists jobs_ats_intel_status_idx + on public.jobs (ats_intel_status) + where ats_intel_status is not null; diff --git a/docs/superpowers/plans/linkedin-native-recommended.md b/docs/superpowers/plans/linkedin-native-recommended.md new file mode 100644 index 0000000..5b5e304 --- /dev/null +++ b/docs/superpowers/plans/linkedin-native-recommended.md @@ -0,0 +1,217 @@ +# LinkedIn Native Recommended Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a Rust-native `linkedin recommended` command that uses the logged-in browser session's real LinkedIn network responses to extract recommended jobs into the required JSON schema. + +**Architecture:** Keep the public CLI shape as `autocli linkedin recommended --limit 0 -f json`. Register a native `CliCommand.func` for `linkedin recommended` after bundled YAML discovery and before user adapter discovery, so native behavior overrides the bundled YAML while user adapters remain able to override it. The native command drives the existing browser page, installs its own in-page response capture after navigation (so the patch survives the page lifecycle), uses captured LinkedIn requests/responses as the source of truth, parses records in Rust, captures both list and detail responses keyed by `job_id`, preserves full request signatures for replay, and only paginates when the observed URL or body can be safely transformed. + +**Tech Stack:** Rust 2021, `autocli-core::CliCommand`, `autocli-core::IPage`, AutoCLI `BrowserBridge`/daemon page, browser `fetch`/XHR capture through `IPage::evaluate`, `tokio`, `serde_json`, current output renderer. + +--- + +## Verified Browser/CDP Support + +Serena was used to verify existing browser support before this revision: + +- `IPage` supports `goto`, `evaluate`, `cookies`, `snapshot`, `auto_scroll`, `intercept_requests`, `get_intercepted_requests`, and `get_network_requests`. +- `DaemonPage::cookies` returns browser cookies through the daemon. +- `DaemonPage::evaluate` executes JavaScript in the logged-in browser page. +- `DaemonPage::intercept_requests` and `CdpPage::intercept_requests` install JS monkey-patches for fetch/XHR. +- `get_network_requests()` is Performance API metadata only; it does not provide response bodies. +- Existing typed `get_intercepted_requests()` can lose arbitrary raw JSON response bodies because it deserializes into `InterceptedRequest`. + +Plan consequence: do not rely on CDP response-body support or `get_network_requests()` for data extraction. Implement a command-local capture script via `page.evaluate()` that stores `{ url, method, status, requestHeaders, requestBody, responseText, responseJson }`, then read that raw capture back with `page.evaluate()`. + +## Corrections From Prior Plan + +- No user-agent rotation on one authenticated LinkedIn session. +- No hard-coded or guessed GraphQL endpoints; derive all URLs from captured requests. +- No CDP-level response body extraction (not supported); use in-page JS capture instead. +- Count mismatch is non-fatal by default, strict only with `--strict-count true`. + +## MVP Rework — Gaps Fixed + +1. **Capture survives navigation:** The original plan injected capture before `goto()`, so the JS patch was destroyed when the page navigated. Revised: navigate first, wait for the page to render, then install the in-page capture so it lives for the lifetime of the page. If `intercept_requests` is available as a persistent CDP-level hook, prefer that as an additional safety net. + +2. **Detail responses are captured and merged by `job_id`:** The original plan only parsed list/card GraphQL responses, so `job_description` was always `N/A`. Revised: after extracting job IDs from list responses, trigger per-job detail fetches (by clicking cards or navigating to detail endpoints), capture those detail responses, and merge `job_description` back into each record by `job_id`. + +3. **Full request signature is preserved:** The original capture schema dropped `requestHeaders`, `method`, and `requestBody`. Revised: the capture store includes `{ url, method, status, requestHeaders, requestBody, responseText, responseJson }` so follow-up requests can replay the exact signature observed in the browser session. + +4. **Pagination is gated on safe transformability:** The original plan blindly replaced `start:0` in the URL. LinkedIn may use non-zero start values, base64-encoded variables in the request body, or cursor-based pagination. Revised: inspect the captured list request. If pagination uses a simple integer `start` query parameter, increment it by the page size. If pagination uses a JSON body with a `start` field, transform the body. If the mechanism is opaque (encoded variables, opaque cursors), stop pagination and emit a warning to stderr explaining why. Never fabricate pagination parameters. + +5. **Test commands use valid filter syntax:** `cargo test -p autocli-cli linkedin` passes two test name filters, which Cargo rejects. Revised verification steps use `cargo test -p autocli-cli -- linkedin` (module-scoped) or `cargo test -p autocli-cli linkedin::tests` (exact path). + +--- + +## Implementation Steps + +- [ ] **Step 1: Register native command** + + In `crates/autocli-cli/src/commands/linkedin.rs`, implement a `CliCommand` with: + - `name`: `"recommended"` + - `parent`: `"linkedin"` + - Registration after bundled YAML discovery, before user adapter discovery. + - Flags: `--limit ` (default 0 = unlimited), `--strict-count ` (default false), `-f json`. + +- [ ] **Step 2: Implement in-page capture script (post-navigation)** + + Design a JS capture script that: + - Is injected via `page.evaluate()` **after** `page.goto()` completes and the job list renders. + - Monkey-patches `fetch` and `XMLHttpRequest` to record the full signature: + `{ url, method, status, requestHeaders, requestBody, responseText, responseJson }` + - Stores captured entries in `window.__autocli_captured__`. + - Captures all `/voyager/api/graphql` (or equivalent) requests — both list queries and detail queries. + - Survives for the lifetime of the page (no navigation after injection). + - If `page.intercept_requests()` provides a persistent CDP-level hook, use it as a secondary capture layer to catch requests that fire before the inline script activates. + +- [ ] **Step 3: Implement page navigation and list capture flow** + + - Navigate to `https://www.linkedin.com/jobs/collections/recommended/` via `page.goto()`. + - Wait for the job list DOM to render (poll for card elements or a known container selector). + - **Then** install the capture script from Step 2. + - Scroll incrementally to trigger lazy-loaded list API calls. + - Read captured list responses back via `page.evaluate("window.__autocli_captured__")`. + - Parse list responses in Rust to extract: `job_id`, `job_title`, `company_name`, `location`, `salary`, `post_time`, `apply url`. + +- [ ] **Step 4: Capture and merge job-detail responses** + + - From the parsed list, collect all unique `job_id` values. + - For each `job_id`, trigger a detail fetch by either: + - Clicking each job card in the list (which LinkedIn's UI translates to a detail API call), or + - Navigating the browser to each job's detail URL and capturing the resulting API responses. + - After each trigger, poll `window.__autocli_captured__` for new entries keyed by `job_id`. + - Parse detail responses to extract `job_description`. + - Merge `job_description` into each output record by matching `job_id`. + - If a detail response is never captured for a given `job_id`, emit `job_description: null` and warn on stderr. + +- [ ] **Step 5: Implement safe pagination** + + - Inspect the captured list request that produced the first page of results. + - Identify the pagination mechanism: + - **URL query parameter `start` (integer):** safe — increment by the page size. + - **JSON request body field `start` (integer):** safe — transform the body and replay with preserved headers. + - **Base64-encoded variables, opaque cursors, or non-integer pagination:** unsafe — stop and warn. + - For safe mechanisms, replay the captured request with the transformed URL/body via `page.evaluate()` (using `fetch` from within the page context to reuse cookies/headers). + - Capture and parse the new responses, merging into the result set. + - Stop when `--limit` is reached, no more results are returned, or pagination is exhausted. + - If pagination is unsafe, emit a stderr warning: `"pagination mechanism not transformable; returning only initial page results"`. + +- [ ] **Step 6: Handle count mismatch** + + - Compare extracted job count against LinkedIn's displayed count (scraped from the page DOM). + - If mismatch: warn on stderr (default) or error out with a non-zero exit code (`--strict-count true`). + - Never include count metadata in JSON output. + +- [ ] **Step 7: Output formatting** + + - Serialize parsed jobs as a JSON array to stdout. + - Support `-f json` explicitly; JSON is the default/only format for this command. + - Ensure the output schema matches: `job_title`, `company_name`, `location`, `salary`, `post_time`, `job_description`, `apply url`. + +--- + +## Verification + +- [ ] **Step 1: Exploration crawl** + + Run: + ```bash + AUTOCLI_BROWSER_COMMAND_TIMEOUT=600 cargo run -p autocli-cli -- linkedin recommended --limit 3 -f json 2>output/linkedin_recommended_exploration.json + ``` + + Expected: + - Browser launches, navigates to LinkedIn recommended jobs. + - Capture script is installed AFTER page render. + - Output is valid JSON with up to 3 job entries, each with a non-null `job_description` (from detail responses). + - If empty or `job_description` is null, inspect `output/linkedin_recommended_exploration.json` for raw capture diagnostics. + +- [ ] **Step 2: Inspect captured responses** + + If output is empty or detail data is missing, inspect the raw captured data: + ```bash + cat output/linkedin_recommended_exploration.json | python3 -m json.tool + ``` + + Expected: enough raw JSON job responses or enough page context to debug capture. + If not, check whether capture was installed after navigation and whether detail triggers fired API calls. + +- [ ] **Step 3: Run full command** + + Run: + ```bash + mkdir -p output + AUTOCLI_BROWSER_COMMAND_TIMEOUT=1200 cargo run -p autocli-cli -- linkedin recommended --limit 0 -f json > output/jd_full.json + ``` + + Expected: + - stdout is valid JSON array. + - Any count mismatch appears on stderr as a warning, not inside JSON. + - If pagination is unsafe, a warning explains why only the initial page was returned. + +- [ ] **Step 4: Validate JSON schema** + + Run: + ```bash + python3 - <<'PY' + import json + from pathlib import Path + + data = json.loads(Path("output/jd_full.json").read_text()) + required = ["job_title", "company_name", "location", "salary", "post_time", "job_description", "apply url"] + assert isinstance(data, list), type(data) + for index, item in enumerate(data): + missing = [key for key in required if key not in item] + assert not missing, (index, missing) + print(len(data)) + PY + ``` + + Expected: + - Prints the output array length. + - No assertion fails. + +- [ ] **Step 5: Verify strict count behavior** + + Run: + ```bash + AUTOCLI_BROWSER_COMMAND_TIMEOUT=1200 cargo run -p autocli-cli -- linkedin recommended --limit 0 --strict-count true -f json > output/jd_full_strict.json + ``` + + Expected: + - Passes if output count equals displayed count. + - Fails with a clear count mismatch error if LinkedIn drifts during crawl. + +- [ ] **Step 6: Final checks** + + Run: + ```bash + cargo test -p autocli-cli -- linkedin + cargo check -q + ``` + + Expected: both pass. + +- [ ] **Step 7: Commit code only** + + Run: + ```bash + git add crates/autocli-cli/src/commands/linkedin.rs crates/autocli-cli/src/commands/mod.rs crates/autocli-cli/src/main.rs + git commit -m "feat(linkedin): crawl recommended jobs natively" + ``` + + Do not add `output/jd_full.json`, `output/jd_full_strict.json`, or `output/linkedin_recommended_exploration.json`. + +## Self-Review + +- The plan uses real network responses from the logged-in browser session as the source of truth. +- Capture is installed **after** navigation so the JS patch survives the page lifecycle. +- Both list and detail responses are captured and merged by `job_id`; `job_description` is populated from real data. +- The full request signature (`url`, `method`, `status`, `requestHeaders`, `requestBody`, `responseText`, `responseJson`) is preserved for replay. +- Pagination only proceeds when the observed mechanism is safely transformable; opaque pagination stops with a warning. +- The plan verifies current browser/CDP support and avoids relying on unavailable CDP response-body APIs. +- The plan avoids user-agent rotation on one authenticated session. +- Count mismatch is non-fatal by default and strict only when `--strict-count true`. +- The plan never commits `output/jd_full.json` or live LinkedIn evidence. +- The output remains a JSON array with the requested keys: `job_title`, `company_name`, `location`, `salary`, `post_time`, `job_description`, and `apply url`. +- Test commands use valid Cargo filter syntax (`cargo test -p autocli-cli -- linkedin`). diff --git a/docs/superpowers/specs/2026-05-02-jd-structured-pipeline-design.md b/docs/superpowers/specs/2026-05-02-jd-structured-pipeline-design.md new file mode 100644 index 0000000..19bbaa2 --- /dev/null +++ b/docs/superpowers/specs/2026-05-02-jd-structured-pipeline-design.md @@ -0,0 +1,421 @@ +# JD Structured Extraction Pipeline — Design Spec + +**Status**: approved +**Date**: 2026-05-02 +**Branch**: codex/linkedin-recommended-with-jd + +## Overview + +Pipeline that reads raw JDs from `output/final.json`, sends them to a local +`qwen3-jd-parser` model for structured JSON extraction, and stores results in +Supabase `jobs.jd_structured`. MVP: manual trigger, single Python script. +Future: message-queue trigger. + +## Architecture + +``` +final.json (200 JDs) + │ + ▼ +┌──────────────────────────────────────────────────────┐ +│ jd_pipeline.py │ +│ │ +│ 1. validate input row schema │ +│ 2. store jd_raw + compute raw_hash │ +│ 3. preprocess → jd_cleaned + cleaned_hash │ +│ 4. tokenize stats → adjust server -c │ +│ 5. skip policy (status=ok & version match & hash ok) │ +│ 6. claim (atomic SQL UPDATE RETURNING id) │ +│ 7. async batch → LLM call (temp=0, json_schema) │ +│ 8. parse + jsonschema validate │ +│ 9. retry: validation-error feedback → minimal extract│ +│10. atomic upsert (INSERT ON CONFLICT + run_id guard) │ +│11. dead_letter sync (update jobs.status) │ +│12. write extraction_runs summary │ +└──────────────────────────────────────────────────────┘ + │ + ▼ +Supabase jobs.jd_structured (jsonb) +``` + +## Status Machine + +``` + ┌──────────────────────────────────────────┐ + │ │ +pending ──→ processing ──→ ok │ + ↑ │ │ │ + │ │ └── schema/prompt/extractor │ + │ │ version bump → pending │ + │ │ │ + │ ├──→ dead_letter (terminal, log to DLQ) │ + │ │ │ + │ └──→ failed (retryable, will be reclaimed) │ + │ │ │ + │ └──→ pending (on next run) │ + │ │ + └── stale processing (>30min) ──────────────────────┘ +``` + +## Component Details + +### 1. Preprocessing + +Rules (script logic, no model involved): + +- Remove LinkedIn boilerplate snippets (e.g. "Application Process (Takes 20 Min)...") +- Collapse multiple blank lines +- Unicode normalization (fullwidth → ASCII where applicable) +- Strip control characters + +Invariants: + +- `jd_raw` is immutable — always stored as-is +- `jd_cleaned` is model input only +- Both hashes stored for traceability: `raw_hash`, `cleaned_hash` +- `preprocess_version` recorded (e.g. `"linkedin-jd-clean-v1"`) + +### 2. Tokenize Pass + +Before batch processing, run all `jd_cleaned` through llama.cpp `/tokenize` endpoint. +Collect p50, p90, p95, max token counts. Use these to set server `-c`: + +- p95 < 6000 → `-c 8192` +- p95 < 10000 → `-c 12288` +- p95 >= 10000 → `-c 16384` + +Avoid `-c 40960` unless proven necessary — larger context reduces throughput. + +### 3. Skip Policy + +Skip a JD when ALL of the following hold: + +- `jd_structured_status = 'ok'` +- `jd_structured_extractor_version = current_extractor_version` +- `jd_structured_schema_version = current_schema_version` +- `jd_structured_prompt_version = current_prompt_version` +- `jd_structured_raw_hash = current_raw_hash` +- `jd_structured_cleaned_hash = current_cleaned_hash` + +### 4. Claim (Atomic) + +```sql +UPDATE jobs SET + jd_structured_status = 'processing', + processing_run_id = :run_id, + processing_started_at = now() +WHERE url_hash = :url_hash + AND ( + jd_structured_status IS NULL + OR jd_structured_status IN ('pending', 'failed') + OR jd_structured_extractor_version IS DISTINCT FROM :extractor_ver + OR jd_structured_schema_version IS DISTINCT FROM :schema_ver + OR jd_structured_prompt_version IS DISTINCT FROM :prompt_ver + OR jd_structured_raw_hash IS DISTINCT FROM :raw_hash + OR jd_structured_cleaned_hash IS DISTINCT FROM :cleaned_hash + ) +RETURNING id; +``` + +No RETURNING row → another worker already claimed, skip. + +### 5. LLM Call + +**Model**: qwen3-jd-parser.gguf via llama.cpp server at `http://127.0.0.1:8091` + +**Request**: + +```json +{ + "messages": [{"role": "user", "content": ""}], + "temperature": 0, + "max_tokens": 1536, + "response_format": { + "type": "json_schema", + "json_schema": { + "schema": { + "type": "object", + "properties": { ... }, + "required": [...], + "additionalProperties": false + } + } + } +} +``` + +**max_tokens rules** (output-complexity based, NOT input-length based): + +| Condition | max_tokens | +|-----------|-----------| +| Default | 1536 | +| With evidence quotes | 3072 | +| Hard cap | 4096 | + +**Timeout**: `min(300, max(60, p95_latency_seconds * 2))` + +**Concurrency**: client semaphore ≤ server `-np` slots. Recommended: `-np 8` with semaphore 6 (leave scheduling headroom). + +### 6. Server Startup (`run-server.sh`) + +```bash +exec llama-server \ + -m qwen3-jd-parser.gguf \ + -ngl 99 \ + --host 127.0.0.1 --port 8091 \ + -c \ + -np \ + -b 4096 \ + -ub 1024 \ + --cache-type-k f16 --cache-type-v f16 \ + -fa on \ + --jinja \ + --metrics \ + --cont-batching +``` + +### 7. JSON Schema (output validation) + +```python +JD_SCHEMA = { + "type": "object", + "additionalProperties": False, + "required": [ + "job_title", + "company_name", + "location", + "skills", + "responsibilities", + "qualifications", + "summary", + "confidence" + ], + "properties": { + "job_title": {"type": "string", "minLength": 1}, + "company_name": {"type": "string", "minLength": 1}, + "location": {"type": "string"}, + "salary_range": {"type": ["string", "null"]}, + + "skills": { + "type": "array", + "items": {"type": "string", "minLength": 1}, + "maxItems": 30 + }, + "responsibilities": { + "type": "array", + "items": {"type": "string", "minLength": 1}, + "maxItems": 12 + }, + "qualifications": { + "type": "array", + "items": {"type": "string", "minLength": 1}, + "maxItems": 12 + }, + + "experience_level": { + "type": ["string", "null"], + "enum": ["intern", "junior", "mid", "senior", "lead", "principal", "unknown", None] + }, + "employment_type": { + "type": ["string", "null"], + "enum": ["full_time", "part_time", "contract", "temporary", "internship", "unknown", None] + }, + "summary": {"type": "string", "minLength": 20, "maxLength": 800}, + + "confidence": { + "type": "object", + "additionalProperties": False, + "required": ["overall", "missing_fields"], + "properties": { + "overall": {"type": "number", "minimum": 0, "maximum": 1}, + "missing_fields": { + "type": "array", + "items": {"type": "string"} + } + } + } + } +} +``` + +### 8. Retry Strategy + +All retries use `temperature = 0`: + +| Attempt | Strategy | +|---------|----------| +| 1 | Standard call with json_schema grammar constraint | +| 2 | Feed validation errors back to model for repair | +| 3 | Minimal extraction: only core fields (title, company, skills, summary) | +| Fail | → dead_letter_records + update jobs.status = 'dead_letter' | + +### 9. Atomic Upsert + +```sql +INSERT INTO jobs ( + url, url_hash, source, + jd_raw, + jd_structured, + jd_structured_status, + jd_structured_extractor, + jd_structured_extractor_version, + jd_structured_schema_version, + jd_structured_prompt_version, + jd_structured_raw_hash, + jd_structured_cleaned_hash, + jd_structured_processed_at, + updated_at +) VALUES (...) +ON CONFLICT (url_hash) +DO UPDATE SET + jd_structured = EXCLUDED.jd_structured, + jd_structured_status = 'ok', + jd_structured_extractor = EXCLUDED.jd_structured_extractor, + jd_structured_extractor_version = EXCLUDED.jd_structured_extractor_version, + jd_structured_schema_version = EXCLUDED.jd_structured_schema_version, + jd_structured_prompt_version = EXCLUDED.jd_structured_prompt_version, + jd_structured_raw_hash = EXCLUDED.jd_structured_raw_hash, + jd_structured_cleaned_hash = EXCLUDED.jd_structured_cleaned_hash, + jd_structured_processed_at = now(), + updated_at = now() +WHERE jobs.processing_run_id = :run_id; +``` + +The `WHERE processing_run_id = :run_id` guard prevents stale runs from overwriting newer results. + +### 10. Dead Letter Sync + +```sql +UPDATE jobs SET + jd_structured_status = 'dead_letter', + processing_run_id = NULL +WHERE url_hash = :url_hash + AND processing_run_id = :run_id; + +INSERT INTO dead_letter_records ( + url_hash, url, stage, error_class, error_message, + raw_response, validation_errors, attempt_count, + model, prompt_version, schema_version +) VALUES (...); +``` + +### 11. Stale Processing Reaper + +Run at pipeline startup: + +```sql +UPDATE jobs SET + jd_structured_status = 'pending', + processing_run_id = NULL, + processing_started_at = NULL +WHERE jd_structured_status = 'processing' + AND processing_started_at < now() - INTERVAL '30 minutes'; +``` + +## Database Changes + +### New columns on `jobs` + +```sql +ALTER TABLE jobs ADD COLUMN jd_structured_status TEXT + DEFAULT 'pending' + CHECK (jd_structured_status IN ('pending','processing','ok','failed','dead_letter')); + +ALTER TABLE jobs ADD COLUMN jd_structured_extractor TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_extractor_version TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_schema_version TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_prompt_version TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_raw_hash TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_cleaned_hash TEXT; +ALTER TABLE jobs ADD COLUMN jd_structured_processed_at TIMESTAMPTZ; + +ALTER TABLE jobs ADD COLUMN processing_run_id TEXT; +ALTER TABLE jobs ADD COLUMN processing_started_at TIMESTAMPTZ; + +CREATE INDEX idx_jobs_jd_structured_status ON jobs (jd_structured_status) + WHERE jd_structured_status IN ('pending','processing'); +CREATE INDEX idx_jobs_extractor_version ON jobs (jd_structured_extractor_version); +``` + +### New table: `extraction_runs` + +```sql +CREATE TABLE extraction_runs ( + id BIGSERIAL PRIMARY KEY, + run_id TEXT UNIQUE NOT NULL, + started_at TIMESTAMPTZ DEFAULT now(), + finished_at TIMESTAMPTZ, + input_file TEXT, + total_count INT, + success_count INT DEFAULT 0, + failed_count INT DEFAULT 0, + skipped_count INT DEFAULT 0, + model TEXT, + model_quant TEXT, + server_params JSONB, + prompt_version TEXT, + schema_version TEXT, + extractor_version TEXT, + avg_latency_ms FLOAT, + p95_latency_ms FLOAT, + avg_prompt_tokens INT, + avg_completion_tokens INT +); +``` + +### New columns on `dead_letter_records` (if not present) + +```sql +ALTER TABLE dead_letter_records ADD COLUMN stage TEXT; +ALTER TABLE dead_letter_records ADD COLUMN error_class TEXT; +ALTER TABLE dead_letter_records ADD COLUMN error_message TEXT; +ALTER TABLE dead_letter_records ADD COLUMN raw_response TEXT; +ALTER TABLE dead_letter_records ADD COLUMN validation_errors JSONB; +ALTER TABLE dead_letter_records ADD COLUMN attempt_count INT; +ALTER TABLE dead_letter_records ADD COLUMN model TEXT; +ALTER TABLE dead_letter_records ADD COLUMN prompt_version TEXT; +ALTER TABLE dead_letter_records ADD COLUMN schema_version TEXT; +``` + +## Files + +| File | Purpose | +|------|---------| +| `scripts/jd_pipeline.py` | Main pipeline script | +| `scripts/jd_pipeline_config.py` | Config: versions, timeouts, schema | +| `scripts/requirements.txt` | Python deps: httpx, supabase, jsonschema | +| `home/rick/models/.../run-server.sh` | Updated server startup params | + +## Versions + +``` +extractor: "qwen3-jd-parser" +extractor_version: "v1" +schema_version: "v1" +prompt_version: "linkedin-v1" +preprocess_version: "linkedin-jd-clean-v1" +``` + +## Error Handling + +- Single JD failure does not block the batch +- Network timeout → retry once, then dead_letter +- JSON parse failure → retry with validation feedback, then minimal extract, then dead_letter +- Schema validation failure → same retry ladder +- Stale processing rows → reaped at startup (30min threshold) +- All failures logged to `extraction_runs` summary and `dead_letter_records` + +## Testing + +- Unit: preprocess rules on sample JD texts +- Unit: JSON schema validation with valid/invalid outputs +- Integration: single JD end-to-end (claim → LLM → upsert) +- Integration: idempotency (run twice, second run skips all) +- Manual: benchmark concurrency (find optimal semaphore for -np) + +## Future (out of scope for MVP) + +- Message queue trigger (function signature ready: `process_batch(jobs: List[dict])`) +- Evidence quotes in output (`evidence.skills[].quote`) +- Incremental mode (process only new JDs since last run) +- `-c` dynamic adjustment mid-run based on actual token counts diff --git a/docs/superpowers/specs/2026-05-03-ats-form-intelligence-design.md b/docs/superpowers/specs/2026-05-03-ats-form-intelligence-design.md new file mode 100644 index 0000000..f5f2967 --- /dev/null +++ b/docs/superpowers/specs/2026-05-03-ats-form-intelligence-design.md @@ -0,0 +1,237 @@ +# ATS Form Intelligence Design + +## Goal + +Build the first production foundation for ATS form intelligence in AutoCLI. The system reacts to jobs written to Supabase or an existing queue, uses each job's `external_url` as the source of truth, extracts ATS/platform/form evidence, and persists structured intelligence without submitting applications. + +This is not an auto-apply system. It must never submit applications, bypass CAPTCHA, automate Google/SSO/password/MFA/passkey login, collect hidden credentials, or invent form fields without DOM/API/network evidence. + +## Repository Context + +AutoCLI is a Rust workspace with reusable browser and discovery primitives: + +- `autocli-core::IPage` defines browser operations for navigation, JS evaluation, snapshots, screenshots, cookies, tabs, interception, and network requests. +- `autocli-browser` provides `BrowserBridge`, `DaemonPage`, and `CdpPage`. +- `autocli-ai::explore` already performs API surface discovery, JSON suffix probing, `__INITIAL_STATE__` extraction, framework detection, Pinia/Vuex store discovery, and endpoint analysis. +- `autocli-ai::generate` and `cascade` provide useful discovery patterns, but the ATS system must not expose a CLI-first workflow as the production path. +- The repo does not currently contain a durable Supabase/queue layer. The ATS design adds a worker-facing boundary for the already-existing Supabase schema. + +Serena and a bounded read-only explorer were used for the repository inspection. Supabase MCP was requested, but no Supabase MCP namespace was exposed in this session, so live schema inspection is not part of this design pass. + +## Chosen Approach + +Use Alternative 1: one focused crate plus a minimal worker entrypoint. + +Add: + +- `crates/autocli-ats`: ATS core library with explicit internal modules. +- `crates/autocli-ats-worker`: minimal event-driven worker binary. + +This keeps the first code change smaller than a multi-crate service split while preserving clean module boundaries. The production path is the worker, not a user-facing CLI. + +## Core Modules + +`core` +: Job input/output types, status machine, canonical URL/hash, ATS detection result, required output JSON schema, schema hash helpers, and safety constants. + +`orchestrator` +: Idempotent job flow. It loads the job from the queue payload, canonicalizes `external_url`, checks caches, runs discovery/session/browser steps, persists terminal or blocked states, and acknowledges the queue message only after persistence succeeds. + +`discovery` +: Deterministic ATS detection for Lever, Greenhouse, Ashby, SmartRecruiters, Workday, Google Careers, and Generic. It also adapts `autocli-ai::explore` into structured ATS platform evidence when browser verification is required. + +`browser` +: `BrowserIntelExtractor` over `Arc`. The MVP extracts Lever and generic public forms from DOM/accessibility/network evidence. It records fields, labels, required state, file upload requirements, buttons, final submit existence, login walls, CAPTCHA markers, multi-page shape, and observed network evidence. + +`session` +: Metadata-only session gate. It checks whether a valid session exists for user/platform/provider/domain, creates login requests when needed, and never automates login or stores plaintext session material. + +`supabase` +: Repository and queue adapter for the existing Supabase schema. It consumes existing tables/queues, writes statuses/intelligence/login requests, and provides dedupe/lock operations. No live Supabase changes are applied by the worker. + +`worker` +: Small loop that reads a bounded batch from the existing queue, calls the orchestrator, persists success/failure/blocking state, and exits or sleeps based on config. + +## Supabase Schema Contract + +The worker consumes the existing Supabase schema. It expects the existing system to provide tables or compatible views for: + +- `jobs` +- `ats_job_form_intelligence` +- `ats_platform_intelligence` +- `ats_sessions` +- `login_requests` +- queue/dead-letter concepts equivalent to `ats_intel_requested`, `browser_intel_requested`, `alert_requested`, and `dead_letter` + +The current `jobs` table was reported to be missing storage for the stashed external application URL. This branch includes a non-applied Supabase SQL contract at `docs/supabase/ats_jobs_external_url_contract.sql` for review, but the worker must not automatically apply migrations. + +Minimum job fields needed by the ATS worker: + +- `id` +- `company` +- `title` +- `url` or LinkedIn source URL +- `external_url` +- `external_url_hash` +- `ats_platform` +- `ats_intel_status` +- `ats_intel_id` +- `ats_intel_error` +- `ats_intel_requested_at` +- `ats_intel_completed_at` + +If the live schema uses different names, `autocli-ats::supabase` should map those names at the repository boundary instead of leaking schema differences into the orchestrator. + +## Event Flow + +1. Worker receives an `ats_intel_requested` queue message or equivalent existing Supabase queue row. +2. Orchestrator loads the referenced job. +3. Orchestrator rejects or blocks the job if `external_url` is absent. +4. URL canonicalizer removes tracking parameters such as `utm_source`, `utm_medium`, `utm_campaign`, `lever-source`, `gh_src`, `source`, and `ref`, while preserving job-identifying parameters. +5. Detector extracts platform, domain, company slug, posting ID, job ID, or req ID when possible. +6. Job-level cache lookup checks `ats_job_form_intelligence` by canonical URL hash, posting identifiers, and schema hash where available. +7. Platform-level cache lookup checks `ats_platform_intelligence`. +8. Session gate checks platform/domain/provider session state before browser extraction when needed. +9. Browser extractor opens public pages through existing `IPage` implementations, initially `BrowserBridge`/CDP-backed. +10. Extractor records deterministic DOM/accessibility/network evidence and stops before final submit. +11. Orchestrator persists `ats_job_form_intelligence` and updates the job status. +12. Login/CAPTCHA states create login requests or alert queue entries and leave jobs requeueable. + +## Status Machine + +Use explicit statuses: + +- `pending` +- `queued` +- `checking_cache` +- `cache_hit` +- `processing` +- `discovery_required` +- `session_checking` +- `login_required` +- `session_ready` +- `browser_extracting` +- `normalizing` +- `ok` +- `captcha_required` +- `expired_job` +- `unsupported` +- `failed` +- `dead_letter` + +Login and CAPTCHA are expected blocked states, not generic failures. + +## Cache And Dedupe + +The orchestrator is cache-first. It opens a browser only after job-level and platform-level cache checks miss or require verification. + +Suggested dedupe keys: + +- `ats_intel:{canonical_apply_url_hash}` +- `ats_discovery:{ats_platform}:{domain}` +- `browser_intel:{canonical_apply_url_hash}:{session_id_or_public}` + +The Supabase adapter owns lock acquisition and duplicate message handling. The orchestrator remains deterministic and idempotent. + +## Browser Extraction Rules + +The MVP extractor supports: + +- public Lever forms +- generic public HTML forms +- login wall detection +- CAPTCHA detection +- final submit detection without clicking submit +- resume/file upload detection +- field label, placeholder, autocomplete, required, visible, disabled, and nearby text extraction +- button extraction +- single-page flow graph +- limited safe reveal actions for non-final `Start application`, `Next`, or `Continue` controls +- timeout and bounded step count + +It does not submit, solve CAPTCHA, enter credentials, or automate login. + +Network evidence can come from existing `IPage::get_network_requests`, `intercept_requests`, and page-level JS capture. Existing `autocli-ai::explore` can enrich platform discovery, but it must not become the source of truth for final form fields. + +## LLM Boundary + +LLM normalization is optional and not part of the first critical path. If added, it may classify field semantic roles or endpoint purpose from deterministic evidence only. All LLM output must be schema-validated and must not invent fields, decide to submit, bypass CAPTCHA, or produce final intelligence without evidence. + +## Error Handling + +Malformed queue messages, missing required job identifiers, Supabase persistence errors, and repeated infrastructure failures can go to `dead_letter` after bounded retries. + +Missing login sessions produce: + +- job status `login_required` +- `login_requests` row with provider/domain/login URL/reason/status +- alert queue row if the existing system supports alerts + +CAPTCHA produces: + +- job status `captcha_required` +- evidence with CAPTCHA type when identifiable +- no bypass attempt + +Browser failures produce structured `failed` status only after capturing the last known page URL, platform, extraction step, and error. + +## Tests + +Add tests close to `crates/autocli-ats`. + +Fixtures: + +- Lever public application HTML/URL +- Greenhouse URL pattern +- Ashby URL pattern +- Workday login-required URL pattern +- Google Careers login-required URL pattern +- unknown generic form HTML +- CAPTCHA marker HTML +- expired job page HTML + +Test coverage: + +- URL canonicalization +- ATS detection +- cache hit path +- cache miss path +- login required path +- CAPTCHA required path +- Lever form extraction +- final submit detection without clicking submit +- output schema validation +- idempotent queue handling + +Supabase tests should use in-memory fakes implementing the same repository/queue traits unless Supabase MCP or a test database is available in the implementation session. + +## Implementation Deliverables + +First implementation pass: + +- Add `crates/autocli-ats` with modules for core, orchestrator, discovery, browser, session, and supabase boundaries. +- Add `crates/autocli-ats-worker` as a minimal worker binary. +- Add Rust tests and ATS HTML/JSON fixtures. +- Keep `docs/supabase/ats_jobs_external_url_contract.sql` as the reviewed, non-applied contract documenting the required `jobs.external_url` storage gap. +- Reuse `autocli-core::IPage`, `autocli-browser::BrowserBridge`, `autocli-browser::CdpPage`, and `autocli-ai::explore`. +- Avoid polished CLI UX. A local replay tool may be added later only for debugging. + +Out of scope for the first pass: + +- applying Supabase migrations automatically +- full Workday extraction +- Google Careers login automation +- CAPTCHA solving +- final submit automation +- Playwright backend +- LLM-based final schema generation + +## Approval State + +Approved design choices from brainstorming: + +- Use one focused ATS crate plus a minimal worker binary. +- Consume the existing Supabase schema, with the latest update that the branch must document the missing job `external_url` storage gap. +- Use existing AutoCLI `IPage` with `BrowserBridge`/CDP for MVP browser extraction. +- Keep Playwright as a future backend behind the extractor trait. +- Make the worker path primary and keep CLI/replay tooling secondary. diff --git a/extension/.DS_Store b/extension/.DS_Store deleted file mode 100644 index 3bc3844..0000000 Binary files a/extension/.DS_Store and /dev/null differ diff --git a/extension/package-lock.json b/extension/package-lock.json index a511312..88dfbdd 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "autocli-extension", - "version": "1.5.5", + "version": "1.5.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "autocli-extension", - "version": "1.5.5", + "version": "1.5.6", "dependencies": { "@mozilla/readability": "^0.6.0" }, diff --git a/extension/src/background.ts b/extension/src/background.ts index 978f354..b125b94 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -147,16 +147,29 @@ type AutomationSession = { const automationSessions = new Map(); const WINDOW_IDLE_TIMEOUT = 30000; // 30s — quick cleanup after command finishes +// Track active commands per workspace to prevent idle timeout during execution. +const activeCommands = new Map(); + function getWorkspaceKey(workspace?: string): string { return workspace?.trim() || 'default'; } -function resetWindowIdleTimer(workspace: string): void { +function clearWindowIdleTimer(session: AutomationSession): void { + if (session.idleTimer) { + clearTimeout(session.idleTimer); + session.idleTimer = null; + } +} + +function startWindowIdleTimer(workspace: string): void { const session = automationSessions.get(workspace); if (!session) return; - if (session.idleTimer) clearTimeout(session.idleTimer); + if ((activeCommands.get(workspace) || 0) > 0) return; // don't start while commands active + clearWindowIdleTimer(session); session.idleDeadlineAt = Date.now() + WINDOW_IDLE_TIMEOUT; session.idleTimer = setTimeout(async () => { + // Double-check no active commands when the timer fires + if ((activeCommands.get(workspace) || 0) > 0) return; const current = automationSessions.get(workspace); if (!current) return; try { @@ -169,6 +182,14 @@ function resetWindowIdleTimer(workspace: string): void { }, WINDOW_IDLE_TIMEOUT); } +function resetWindowIdleTimer(workspace: string): void { + const session = automationSessions.get(workspace); + if (!session) return; + clearWindowIdleTimer(session); + if ((activeCommands.get(workspace) || 0) > 0) return; // don't start while commands active + startWindowIdleTimer(workspace); +} + /** Get or create the dedicated automation window. * @param initialUrl — if provided (http/https), used as the initial page instead of about:blank. * This avoids an extra blank-page→target-domain navigation on first command. @@ -282,8 +303,10 @@ chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => { async function handleCommand(cmd: Command): Promise { const workspace = getWorkspaceKey(cmd.workspace); - // Reset idle timer on every command (window stays alive while active) - resetWindowIdleTimer(workspace); + // Clear idle timer and track active command to prevent window closure during execution + const session = automationSessions.get(workspace); + if (session) clearWindowIdleTimer(session); + activeCommands.set(workspace, (activeCommands.get(workspace) || 0) + 1); try { switch (cmd.action) { case 'exec': @@ -315,6 +338,14 @@ async function handleCommand(cmd: Command): Promise { ok: false, error: err instanceof Error ? err.message : String(err), }; + } finally { + const count = (activeCommands.get(workspace) || 0) - 1; + if (count <= 0) { + activeCommands.delete(workspace); + startWindowIdleTimer(workspace); + } else { + activeCommands.set(workspace, count); + } } } diff --git a/extension/src/cdp.ts b/extension/src/cdp.ts index 450c173..44036b8 100644 --- a/extension/src/cdp.ts +++ b/extension/src/cdp.ts @@ -135,7 +135,7 @@ export async function evaluate(tabId: number, expression: string, aggressiveRetr // Only retry on attach/debugger errors, not on JS eval errors const isNavigateError = msg.includes('Inspected target navigated') || msg.includes('Target closed'); const isAttachError = isNavigateError || msg.includes('attach failed') || msg.includes('Debugger is not attached') - || msg.includes('chrome-extension://'); + || msg.includes('chrome-extension://') || msg.includes('Detached while handling command'); if (isAttachError && attempt < MAX_EVAL_RETRIES) { attached.delete(tabId); // Force re-attach on next attempt // SPA navigations recover quickly; debugger detach needs longer diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..3c595f3 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,4 @@ +[toolchain] +channel = "1.94" +components = ["rustfmt", "clippy"] +profile = "minimal" diff --git a/scripts/.env.example b/scripts/.env.example new file mode 100644 index 0000000..620753a --- /dev/null +++ b/scripts/.env.example @@ -0,0 +1,4 @@ +# JD Pipeline environment variables +# Copy to .env and fill in values +SUPABASE_URL=https://mivspjqggjiypupwsgqr.supabase.co +SUPABASE_KEY= \ No newline at end of file diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/apply-supabase-migrations.sh b/scripts/apply-supabase-migrations.sh new file mode 100755 index 0000000..d56508d --- /dev/null +++ b/scripts/apply-supabase-migrations.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# apply-supabase-migrations.sh +# Apply Supabase database migrations for JD structured extraction pipeline +# +# Prerequisites: +# 1. Supabase CLI installed (npm install -g supabase) +# 2. Logged in: supabase login +# 3. Linked: supabase link --project-ref mivspjqggjiypupwsgqr +# +# Usage: +# ./scripts/apply-supabase-migrations.sh +# +# The migrations directory is at supabase/migrations/ and contains: +# 20260502203201_add_jd_structured_columns.sql +# 20260502203202_create_jd_structured_indexes.sql +# 20260502203203_create_extraction_runs_table.sql +# 20260502203204_add_dead_letter_columns.sql + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +echo "Applying Supabase migrations from $PROJECT_DIR/supabase/migrations/" +echo "" + +# Check if supabase CLI is available +if ! command -v supabase &>/dev/null; then + echo "ERROR: supabase CLI not found." + echo " Install: npm install -g supabase" + echo " Or use the Management API alternative below." + echo "" + echo "Alternative: Apply via Supabase Management API:" + echo " ACCESS_TOKEN=your_sbp_token" + echo " for f in supabase/migrations/*.sql; do" + echo " SQL=\$(cat \"\$f\")" + echo " curl -s -X POST \"https://api.supabase.com/v1/projects/mivspjqggjiypupwsgqr/sql\" \\" + echo " -H \"Authorization: Bearer \$ACCESS_TOKEN\" \\" + echo " -H \"Content-Type: application/json\" \\" + echo " -d \"{\\\"query\\\": \\\"\$SQL\\\"}\"" + echo " done" + exit 1 +fi + +cd "$PROJECT_DIR" + +# Check if project is linked +if ! supabase status 2>/dev/null | grep -q "Project URL"; then + echo "Linking to Supabase project mivspjqggjiypupwsgqr..." + supabase link --project-ref mivspjqggjiypupwsgqr +fi + +# Apply migrations +echo "Pushing migrations to Supabase..." +supabase db push + +echo "" +echo "Migrations applied successfully." + +echo "" +echo "Verification: checking tables and columns..." +echo " Run: supabase db diff" +echo " Or connect via psql: psql \"postgresql://postgres:YOUR_PASSWORD@db.mivspjqggjiypupwsgqr.supabase.co:5432/postgres\"" diff --git a/scripts/autocli-baseline.sh b/scripts/autocli-baseline.sh new file mode 100755 index 0000000..2eafea6 --- /dev/null +++ b/scripts/autocli-baseline.sh @@ -0,0 +1,514 @@ +#!/usr/bin/env bash +# ============================================================================= +# autocli-baseline.sh — Pre-flight diagnostic checks for autocli browser commands +# ============================================================================= +# Usage: +# scripts/autocli-baseline.sh [--check-only] [--json] [--refresh-extension] [-- ] +# +# --check-only Run checks only, don't execute any command +# --json Output results as JSON (to stderr: human log, to stdout: JSON) +# --refresh-extension Auto-refresh the Chrome extension if dist is newer (requires +# browser-harness and CDP remote debugging access) +# -- After checks pass, execute this command with logging +# +# Examples: +# scripts/autocli-baseline.sh --check-only +# scripts/autocli-baseline.sh --refresh-extension --check-only +# scripts/autocli-baseline.sh -- autocli linkedin recommended --limit 0 -f json +# scripts/autocli-baseline.sh --json --check-only +# ============================================================================= + +set -euo pipefail + +# ── Configuration ────────────────────────────────────────────────────────── +DAEMON_PORT="${AUTOCLI_DAEMON_PORT:-19925}" +DAEMON_HOST="${AUTOCLI_DAEMON_HOST:-localhost}" +OUTPUT_DIR="output" +TIMEOUT_SHORT=5 # seconds for quick checks +TIMEOUT_LONG=15 # seconds for network checks +SCRIPT_START=$(date +%s) + +# Extension paths +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +EXT_DIR="${REPO_ROOT}/extension" +EXT_DIST="${EXT_DIR}/dist/background.js" +EXT_SRC="${EXT_DIR}/src/background.ts" +REFRESH_MARKER="${AUTOCLI_REFRESH_MARKER:-${REPO_ROOT}/.baseline-last-refresh}" + +# ── Flags ────────────────────────────────────────────────────────────────── +CHECK_ONLY=false +JSON_OUT=false +REFRESH_EXT=false +COMMAND=() + +# ── Color helpers (auto-detect TTY) ─────────────────────────────────────── +if [ -t 2 ]; then + _BOLD='\033[1m'; _RED='\033[31m'; _GREEN='\033[32m'; _YELLOW='\033[33m'; _CYAN='\033[36m'; _NC='\033[0m' +else + _BOLD=''; _RED=''; _GREEN=''; _YELLOW=''; _CYAN=''; _NC='' +fi + +# ── Logging ──────────────────────────────────────────────────────────────── +TS() { date '+%H:%M:%S'; } + +log_info() { echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}INFO${_NC} $*" >&2; } +log_warn() { echo -e "[${_YELLOW}$(TS)${_NC}] ${_BOLD}WARN${_NC} $*" >&2; } +log_error() { echo -e "[${_RED}$(TS)${_NC}] ${_BOLD}ERROR${_NC} $*" >&2; } +log_check() { echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}CHECK${_NC} $* ..." >&2; } +log_pass() { echo -e "[${_GREEN}$(TS)${_NC}] ${_BOLD}PASS${_NC} $*" >&2; } +log_fail() { echo -e "[${_RED}$(TS)${_NC}] ${_BOLD}FAIL${_NC} $*" >&2; } +log_cmd() { echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}CMD${_NC} $*" >&2; } + +# ── State ────────────────────────────────────────────────────────────────── +CHECKS_PASS=0 +CHECKS_FAIL=0 +declare -A CHECK_RESULTS +declare -A CHECK_DETAILS + +record_pass() { + local name="$1"; shift + CHECK_RESULTS["$name"]="pass" + CHECK_DETAILS["$name"]="$*" + log_pass "$name — $*" + ((CHECKS_PASS++)) +} + +record_fail() { + local name="$1"; shift + CHECK_RESULTS["$name"]="fail" + CHECK_DETAILS["$name"]="$*" + log_fail "$name — $*" + ((CHECKS_FAIL++)) +} + +# ── Check functions ──────────────────────────────────────────────────────── +# Each function: returns 0 on success, calls record_pass/fail internally + +check_autocli_binary() { + log_check "autocli binary" + local bin + if bin=$(which autocli 2>/dev/null); then + local ver + ver=$(autocli --version 2>/dev/null || echo "unknown") + record_pass "autocli" "found at $bin, version=$ver" + return 0 + else + record_fail "autocli" "not found in PATH — install with: curl -fsSL https://raw.githubusercontent.com/nashsu/AutoCLI/main/scripts/install.sh | sh" + return 1 + fi +} + +check_chrome_running() { + log_check "Chrome process" + if pgrep -x "Google Chrome" > /dev/null 2>&1; then + local count + count=$(pgrep -c -x "Google Chrome" 2>/dev/null || echo "?") + record_pass "chrome" "running ($count process(es))" + return 0 + else + record_fail "chrome" "Google Chrome is not running — open Chrome with the AutoCLI extension installed" + return 1 + fi +} + +check_daemon_health() { + log_check "daemon (port $DAEMON_PORT)" + local resp + if resp=$(curl -s --max-time "$TIMEOUT_SHORT" "http://${DAEMON_HOST}:${DAEMON_PORT}/ping" 2>/dev/null); then + local ver + ver=$(echo "$resp" | python3 -c "import sys,json; print(json.load(sys.stdin).get('version','unknown'))" 2>/dev/null || echo "parse-error") + record_pass "daemon" "listening on :${DAEMON_PORT}, version=$ver" + return 0 + else + record_fail "daemon" "not responding on http://${DAEMON_HOST}:${DAEMON_PORT}/ping — start with: autocli doctor" + return 1 + fi +} + +check_extension_connected() { + log_check "Chrome extension" + local doctor_out + if doctor_out=$(autocli doctor 2>&1); then + if echo "$doctor_out" | grep -q '✓ Chrome extension connected'; then + record_pass "extension" "connected to daemon" + return 0 + elif echo "$doctor_out" | grep -q '✗ Chrome extension connected'; then + record_fail "extension" "NOT connected — refresh extension in chrome://extensions, ensure correct Chrome profile" + return 1 + else + record_fail "extension" "cannot determine status from autocli doctor" + return 1 + fi + else + record_fail "extension" "autocli doctor command failed" + return 1 + fi +} + +check_linkedin_reachable() { + log_check "LinkedIn reachability" + local code + if code=$(curl -s -o /dev/null -w "%{http_code}" --max-time "$TIMEOUT_LONG" \ + -H "Accept-Language: en-US,en;q=0.9" \ + "https://www.linkedin.com/jobs/" 2>/dev/null); then + if [ "$code" -lt 400 ]; then + record_pass "linkedin" "HTTP $code — reachable" + return 0 + elif [ "$code" -eq 403 ] || [ "$code" -eq 429 ]; then + record_pass "linkedin" "HTTP $code — rate-limited but reachable" + return 0 + else + record_fail "linkedin" "HTTP $code — may be blocked or down" + return 1 + fi + else + record_fail "linkedin" "connection timeout — check network" + return 1 + fi +} + +check_network_dns() { + log_check "DNS resolution" + if host linkedin.com > /dev/null 2>&1 || dscacheutil -q host -a name linkedin.com > /dev/null 2>&1 || ping -c 1 -t 3 linkedin.com > /dev/null 2>&1; then + record_pass "dns" "linkedin.com resolves" + return 0 + else + record_warn() { + echo -e "[${_YELLOW}$(TS)${_NC}] ${_BOLD}WARN${_NC} $*" >&2 + CHECK_RESULTS["$1"]="warn" + CHECK_DETAILS["$1"]="$2" + } + record_warn "dns" "linkedin.com DNS lookup failed — may still work via cached DNS" + return 0 # non-critical + fi +} + +check_output_dir() { + log_check "output directory" + mkdir -p "$OUTPUT_DIR" 2>/dev/null || true + if [ -d "$OUTPUT_DIR" ] && [ -w "$OUTPUT_DIR" ]; then + local files + files=$(ls "$OUTPUT_DIR"/*.json 2>/dev/null | wc -l | tr -d ' ') + record_pass "output_dir" "$OUTPUT_DIR is writable ($files existing JSON files)" + return 0 + else + record_fail "output_dir" "$OUTPUT_DIR is not writable — check permissions" + return 1 + fi +} + +check_disk_space() { + log_check "disk space" + local avail + if avail=$(df -h . 2>/dev/null | awk 'NR==2 {print $4}'); then + record_pass "disk" "available: $avail" + return 0 + else + record_pass "disk" "could not check (non-critical)" + return 0 + fi +} + +# ── Extension freshness ──────────────────────────────────────────────────── + +check_extension_freshness() { + log_check "extension freshness" + + record_warn() { + echo -e "[${_YELLOW}$(TS)${_NC}] ${_BOLD}WARN${_NC} $*" >&2 + CHECK_RESULTS["$1"]="warn" + CHECK_DETAILS["$1"]="$2" + } + + if [ ! -f "$EXT_DIST" ]; then + record_fail "freshness" "extension dist not found at $EXT_DIST — run: cd extension && npm run build" + return 1 + fi + + local dist_mtime + dist_mtime=$(stat -f %m "$EXT_DIST" 2>/dev/null || stat -c %Y "$EXT_DIST" 2>/dev/null || echo 0) + + if [ -f "$REFRESH_MARKER" ]; then + local marker_mtime + marker_mtime=$(stat -f %m "$REFRESH_MARKER" 2>/dev/null || stat -c %Y "$REFRESH_MARKER" 2>/dev/null || echo 0) + + if [ "$dist_mtime" -gt "$marker_mtime" ]; then + local age + age=$(( $(date +%s) - dist_mtime )) + record_fail "freshness" "extension dist is newer than last refresh (built ${age}s ago) — refresh in chrome://extensions or use --refresh-extension" + return 1 + fi + else + # First run without a marker: warn but don't fail + local age + age=$(( $(date +%s) - dist_mtime )) + record_warn "freshness" "no refresh marker yet (dist built ${age}s ago) — use --refresh-extension to create one" + return 0 + fi + + record_pass "freshness" "extension is up to date" + return 0 +} + +refresh_extension() { + log_info "Attempting to auto-refresh Chrome extension..." + + if ! command -v browser-harness &>/dev/null; then + log_error "browser-harness not available — cannot auto-refresh" + log_info "Install: https://github.com/nashsu/browser-harness" + return 1 + fi + + log_info "Navigating to chrome://extensions and clicking refresh..." + + local result + result=$(browser-harness -c " +new_tab('chrome://extensions/') +wait_for_load() +# Ensure dev mode is on +try: + dm_checked = js(\"document.querySelector('extensions-manager').shadowRoot.querySelector('extensions-toolbar').shadowRoot.querySelector('#devMode').checked\") + if not dm_checked: + js(\"document.querySelector('extensions-manager').shadowRoot.querySelector('extensions-toolbar').shadowRoot.querySelector('#devMode').click()\") +except: + pass +# Find AutoCLI card and click reload +r = js('''(function(){ + var items=document.querySelector(\"extensions-manager\").shadowRoot.querySelectorAll(\"extensions-item\"); + for(var i=0;i=0){ + var btn=s.querySelector(\"#reload-button\")||s.querySelector(\"[aria-label=Reload]\"); + if(btn){btn.click();return \"refreshed\";} + return \"no-btn\"; + } + } + return \"not-found\"; +})()''') +print('auto-refresh:' + str(r)) +" 2>&1) + + echo "$result" >&2 + + if echo "$result" | grep -q "refreshed"; then + touch "$REFRESH_MARKER" + log_pass "Extension auto-refreshed successfully" + return 0 + elif echo "$result" | grep -q "no-btn"; then + log_warn "Found AutoCLI but reload button not found — refresh manually" + return 1 + elif echo "$result" | grep -q "not-found"; then + log_error "AutoCLI extension not found in chrome://extensions" + return 1 + else + log_warn "Auto-refresh uncertain — $result" + return 1 + fi +} + +# ── JSON output ──────────────────────────────────────────────────────────── +emit_json() { + local elapsed + elapsed=$(( $(date +%s) - SCRIPT_START )) + python3 -c " +import json, sys +results = { + 'timestamp': '$(date -Iseconds)', + 'elapsed_sec': $elapsed, + 'passed': $CHECKS_PASS, + 'failed': $CHECKS_FAIL, + 'checks': { +$( + for name in "${!CHECK_RESULTS[@]}"; do + echo " '$name': {'status': '${CHECK_RESULTS[$name]}', 'detail': '${CHECK_DETAILS[$name]}'}," + done +) + } +} +print(json.dumps(results, indent=2)) +" +} + +# ── Main: Run baseline ──────────────────────────────────────────────────── +run_baseline() { + echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}autocli baseline check${_NC}" >&2 + echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + echo "" >&2 + + # Critical checks — any failure blocks command execution + check_autocli_binary + check_chrome_running + check_daemon_health + check_extension_connected + + # Advisory checks — failures warn but don't block + check_extension_freshness + check_linkedin_reachable + check_network_dns + check_output_dir + check_disk_space + + echo "" >&2 + local elapsed + elapsed=$(( $(date +%s) - SCRIPT_START )) + + if [ "$CHECKS_FAIL" -eq 0 ]; then + echo -e "[${_GREEN}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + echo -e "[${_GREEN}$(TS)${_NC}] ${_BOLD}All checks passed (${CHECKS_PASS} checks, ${elapsed}s)${_NC}" >&2 + echo -e "[${_GREEN}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + return 0 + else + echo -e "[${_RED}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + echo -e "[${_RED}$(TS)${_NC}] ${_BOLD}$CHECKS_FAIL check(s) FAILED (${CHECKS_PASS} passed, ${elapsed}s)${_NC}" >&2 + echo -e "[${_RED}$(TS)${_NC}] ${_BOLD}══════════════════════════════════════════════${_NC}" >&2 + + # Show remediation hints + for name in "${!CHECK_RESULTS[@]}"; do + if [ "${CHECK_RESULTS[$name]}" = "fail" ]; then + echo -e "[${_YELLOW}$(TS)${_NC}] ${_BOLD}HINT${_NC} $name: ${CHECK_DETAILS[$name]}" >&2 + fi + done + return 1 + fi +} + +# ── Execute a command with logging ───────────────────────────────────────── +run_command() { + local start_ts + start_ts=$(date +%s) + log_cmd "Running: $*" + echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}───── command output ─────${_NC}" >&2 + + local cmd_exit=0 + "$@" || cmd_exit=$? + + local elapsed + elapsed=$(( $(date +%s) - start_ts )) + echo -e "[${_CYAN}$(TS)${_NC}] ${_BOLD}───── end output ──────────${_NC}" >&2 + + if [ "$cmd_exit" -eq 0 ]; then + log_info "Command completed successfully (${elapsed}s)" + else + log_error "Command failed with exit code $cmd_exit (${elapsed}s)" + fi + return $cmd_exit +} + +# ── Argument parsing ────────────────────────────────────────────────────── +parse_args() { + while [ $# -gt 0 ]; do + case "$1" in + --help|-h) + echo "Usage: $0 [--check-only] [--json] [--refresh-extension] [-- ]" + echo "" + echo "Pre-flight diagnostic checks for autocli browser commands." + echo "" + echo "Options:" + echo " --check-only Run checks only, don't execute any command" + echo " --json Output final results as JSON to stdout" + echo " --refresh-extension Auto-refresh Chrome extension if dist is stale" + echo " --help Show this help" + echo " -- Command to run after checks pass" + exit 0 + ;; + --check-only) + CHECK_ONLY=true + shift + ;; + --json) + JSON_OUT=true + shift + ;; + --refresh-extension) + REFRESH_EXT=true + shift + ;; + --) + shift + COMMAND=("$@") + break + ;; + *) + # Assume everything after is a command + if [ "$CHECK_ONLY" = false ] && [ ${#COMMAND[@]} -eq 0 ]; then + COMMAND=("$@") + break + else + log_error "Unknown option: $1" + exit 1 + fi + ;; + esac + done +} + +# ── Entry point ──────────────────────────────────────────────────────────── +main() { + parse_args "$@" + + # Auto-refresh extension if requested + if [ "$REFRESH_EXT" = true ]; then + if [ ! -f "$EXT_DIST" ]; then + log_error "Cannot refresh — extension dist not found at $EXT_DIST" + log_info "Run: cd extension && npm run build" + exit 1 + fi + dist_mtime=$(stat -f %m "$EXT_DIST" 2>/dev/null || stat -c %Y "$EXT_DIST" 2>/dev/null || echo 0) + if [ -f "$REFRESH_MARKER" ]; then + marker_mtime=$(stat -f %m "$REFRESH_MARKER" 2>/dev/null || stat -c %Y "$REFRESH_MARKER" 2>/dev/null || echo 0) + if [ "$dist_mtime" -le "$marker_mtime" ]; then + log_info "Extension already up to date, skipping refresh" + else + refresh_extension || log_warn "Auto-refresh failed — continuing anyway" + fi + else + refresh_extension || log_warn "Auto-refresh failed — continuing anyway" + fi + echo "" >&2 + fi + + local baseline_ok=true + run_baseline || baseline_ok=false + + if [ "$JSON_OUT" = true ]; then + emit_json + fi + + if [ "$CHECK_ONLY" = true ]; then + if [ "$baseline_ok" = true ]; then + exit 0 + else + exit 1 + fi + fi + + if [ ${#COMMAND[@]} -gt 0 ]; then + if [ "$baseline_ok" = false ]; then + CRITICAL_COUNT=0 + for name in autocli chrome daemon extension; do + if [ "${CHECK_RESULTS[$name]:-fail}" = "fail" ]; then + ((CRITICAL_COUNT++)) + fi + done + if [ "$CRITICAL_COUNT" -gt 0 ]; then + log_error "Aborting — $CRITICAL_COUNT critical check(s) failed" + exit 1 + fi + log_warn "Continuing despite non-critical warnings..." + fi + run_command "${COMMAND[@]}" + exit $? + fi + + # No command, not check-only → just ran baseline + if [ "$baseline_ok" = true ]; then + exit 0 + else + exit 1 + fi +} + +main "$@" diff --git a/scripts/backfill_priority_scores.py b/scripts/backfill_priority_scores.py new file mode 100644 index 0000000..fb7307d --- /dev/null +++ b/scripts/backfill_priority_scores.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +"""Backfill priority scores for existing jobs in Supabase. + +Batch-scoring is intentionally *** one-time *** for already-ingested rows. +Ongoing scoring happens in the sync pipeline (--enable-scoring, the default). + +Usage: + python scripts/backfill_priority_scores.py # backfill all unscored + python scripts/backfill_priority_scores.py --force # re-score *all* (even already scored) + python scripts/backfill_priority_scores.py --limit 100 # cap rows processed + python scripts/backfill_priority_scores.py --dry-run # report only, no writes + python scripts/backfill_priority_scores.py --env-file .env # explicit .env path +""" +from __future__ import annotations + +import argparse +import json +import os +import pathlib +import sys +from datetime import date +from typing import Any + +# Ensure project root is on sys.path for `scripts.*` imports +_project_root = str(pathlib.Path(__file__).resolve().parent.parent) +if _project_root not in sys.path: + sys.path.insert(0, _project_root) + +from scripts.job_priority_scorer import SCORER_VERSION, score_job + + +def _load_dotenv(path: str | os.PathLike[str]) -> None: + p = pathlib.Path(path) + if not p.is_file(): + return + for raw_line in p.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip().strip("'").strip('"') + if not key or key in os.environ: + continue + os.environ[key] = value + + +def _auto_load_env() -> None: + _load_dotenv(pathlib.Path.cwd() / ".env") + _load_dotenv(pathlib.Path(__file__).resolve().parent.parent / ".env") + + +def _create_supabase_client(url: str | None, key: str | None): + try: + _path_clean = [p for p in sys.path if p not in ("", ".")] + _path_dirty = [p for p in sys.path if p in ("", ".")] + sys.path = _path_clean + _path_dirty + from supabase import create_client + except Exception as exc: + raise RuntimeError( + "Missing Python dependency 'supabase'. Install deps with:\n" + " uv pip install -r scripts/requirements.txt" + ) from exc + + url = url or os.environ.get("SUPABASE_URL", "") + key = key or os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY", "") + if not url or not key: + raise ValueError( + "Missing Supabase credentials. Set SUPABASE_URL and either " + "SUPABASE_SERVICE_ROLE_KEY (preferred) or SUPABASE_KEY." + ) + return create_client(url, key) + + +def _reconstruct_job_data(row: dict[str, Any]) -> dict[str, Any]: + """Build a job_data dict suitable for score_job() from a DB row. + + Prefers raw_record fields (which preserve the original shape) and falls + back to the extracted column values. + """ + raw = row.get("raw_record") + if isinstance(raw, dict) and raw: + # Use raw_record as the primary source, filling in missing fields from columns + job_data = dict(raw) + # Ensure critical fields exist + for col_key, raw_key in [ + ("job_title", "job_title"), + ("company_name", "company_name"), + ("location", "location"), + ("salary", "salary"), + ("post_time", "post_time"), + ("apply_url", "apply_url"), + ("external_url", "external_url"), + ("job_description", "job_description"), + ]: + if col_key not in job_data or not job_data.get(col_key): + job_data[col_key] = row.get(col_key) or "" + return job_data + + # No raw_record -- reconstruct from columns + return { + "job_title": row.get("job_title") or "", + "company_name": row.get("company_name") or "", + "location": row.get("location") or "", + "salary": row.get("salary") or "", + "post_time": row.get("post_time") or "", + "apply_url": row.get("apply_url") or "", + "external_url": row.get("external_url") or "", + "job_description": row.get("job_description") or "", + "apply_type": row.get("apply_type") or "", + "source_channel": row.get("source_channel") or "", + } + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Backfill priority scores for existing Supabase jobs." + ) + parser.add_argument("--limit", type=int, default=0, help="Cap rows processed (0 = no limit).") + parser.add_argument("--force", action="store_true", + help="Re-score even already-scored jobs.") + parser.add_argument("--dry-run", action="store_true", + help="Report only, do not write to DB.") + parser.add_argument("--supabase-url", dest="supabase_url", help="Override SUPABASE_URL.") + parser.add_argument("--supabase-key", dest="supabase_key", help="Override Supabase key.") + parser.add_argument("--env-file", help="Optional path to a .env file.") + args = parser.parse_args(argv) + + _auto_load_env() + if args.env_file: + _load_dotenv(args.env_file) + + # ── Build query ────────────────────────────────────────────────────── + client = _create_supabase_client(args.supabase_url, args.supabase_key) + + query = client.schema("jobs").table("jobs").select( + "id, source, raw_record, " + "job_title, company_name, location, salary, post_time, " + "apply_url, external_url, job_description, " + "apply_type, source_channel, " + "priority_score, priority_scorer_version, priority_scored_at" + ) + + if not args.force: + # Only rows that have never been *explicitly* scored OR whose version + # is stale. priority_score itself can NOT be the filter — the + # migration's NOT NULL DEFAULT 0 means already-migrated rows look + # "scored" with priority_score=0 even though they were never actually + # run through the scorer. priority_scored_at IS NULL is the only + # honest "never scored" signal. + query = query.or_( + f"priority_scored_at.is.null,priority_scorer_version.neq.{SCORER_VERSION}" + ) + else: + # Re-score everything (order so newer-first is optional but nice) + query = query.order("created_at", desc=True) + + if args.limit and args.limit > 0: + query = query.limit(args.limit) + + try: + resp = query.execute() + except Exception as exc: + print(f"ERROR: query failed: {exc}", file=sys.stderr) + return 2 + + rows = resp.data if isinstance(resp.data, list) else [resp.data] if resp.data else [] + + if not rows: + print(json.dumps({"rows_fetched": 0, "message": "No unscored jobs found."})) + return 0 + + # ── Score each row ─────────────────────────────────────────────────── + results: list[dict[str, Any]] = [] + for idx, row in enumerate(rows): + job_id = str(row.get("id", "")) + if not job_id: + print(f"WARN: skipping row {idx}: missing id", file=sys.stderr) + continue + + try: + job_data = _reconstruct_job_data(row) + result = score_job(job_data) + except Exception as exc: + print(f"WARN: scoring failed for job {job_id}: {exc}", file=sys.stderr) + results.append({ + "job_id": job_id, + "status": "error", + "error": str(exc), + }) + continue + + results.append({ + "job_id": job_id, + "score": result.score, + "tier": result.tier, + "version": result.version, + }) + + # ── Apply ──────────────────────────────────────────────────────────── + if args.dry_run: + report: dict[str, Any] = { + "mode": "dry-run", + "rows_fetched": len(rows), + "rows_scored": len([r for r in results if "score" in r]), + "rows_errored": len([r for r in results if "error" in r]), + "scorer_version": SCORER_VERSION, + } + if results: + scores = [r["score"] for r in results if "score" in r] + tiers = [r["tier"] for r in results if "tier" in r] + if scores: + report["priority_scores"] = { + "min": round(min(scores), 1), + "max": round(max(scores), 1), + "avg": round(sum(scores) / len(scores), 1), + } + if tiers: + tier_counts: dict[str, int] = {} + for t in tiers: + tier_counts[t] = tier_counts.get(t, 0) + 1 + report["priority_tier_distribution"] = tier_counts + print(json.dumps(report, indent=2, ensure_ascii=False)) + return 0 + + # ── Write back ─────────────────────────────────────────────────────── + updated = 0 + for r in results: + if "score" not in r: + continue # errored row, skip + try: + client.rpc("update_job_priority_score", { + "p_job_id": r["job_id"], + "p_priority_score": r["score"], + "p_priority_tier": r["tier"], + "p_priority_scorer_version": r["version"], + "p_priority_signals": {}, + }).execute() + updated += 1 + except Exception as exc: + print( + f"WARN: update failed for job {r['job_id']}: {exc}", + file=sys.stderr, + ) + + print( + json.dumps( + { + "mode": "live", + "rows_fetched": len(rows), + "rows_scored": len(results), + "rows_updated": updated, + "rows_errored": len([r for r in results if "error" in r]), + "scorer_version": SCORER_VERSION, + }, + indent=2, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/clean_linkedin_jobs.py b/scripts/clean_linkedin_jobs.py new file mode 100755 index 0000000..f8b54c3 --- /dev/null +++ b/scripts/clean_linkedin_jobs.py @@ -0,0 +1,511 @@ +#!/usr/bin/env python3 +""" +清洗 LinkedIn 职位数据并支持写入 Supabase。 + +功能: + - HTML 实体解码、标签剥离、薪资解析、技能提取 + - URL 标准化(移除 tracking 参数) + - url_hash = sha256(normalized_url) 用于去重 + - 批内去重(按 url_hash) + - Dead letter queue(不合格数据单独输出) + - 通过 sync_autocli_jobs.py 写入 Supabase + +用法: + # 仅清洗输出到 stdout + python3 clean_linkedin_jobs.py input.json > output.json + + # 清洗 + 写入 Supabase + python3 clean_linkedin_jobs.py input.json --to-db + + # 清洗 + 写入 Supabase + dead letter + python3 clean_linkedin_jobs.py input.json --to-db --dead-letter-file dead.json +""" + +from __future__ import annotations + +import hashlib +import json +import os +import re +import subprocess +import sys +import tempfile +from typing import Any +from urllib.parse import urlparse, urlunparse + + +# --------------------------------------------------------------------------- +# HTML / text cleaning (unchanged from original) +# --------------------------------------------------------------------------- + +def clean_html_text(text: str) -> str: + if not text: + return "" + html_entities = { + "&": "&", "<": "<", ">": ">", """: '"', "'": "'", + "'": "'", " ": " ", "—": "—", "–": "–", + "…": "...", "©": "©", "®": "®", "™": "™", + } + for entity, char in html_entities.items(): + text = text.replace(entity, char) + text = re.sub(r"&#(\d+);", lambda m: chr(int(m.group(1))) if 0 <= int(m.group(1)) <= 0x10FFFF else m.group(0), text) + text = re.sub(r"&#x([0-9a-fA-F]+);", lambda m: chr(int(m.group(1), 16)) if 0 <= int(m.group(1), 16) <= 0x10FFFF else m.group(0), text) + text = re.sub(r"<[^>]+>", "", text) + text = re.sub(r"[\r\n\t]+", " ", text) + text = re.sub(r" +", " ", text) + text = text.replace(" . ", ". ").replace(" , ", ", ") + text = text.replace(" : ", ": ").replace(" ; ", "; ") + return text.strip() + + +def clean_jd(jd: str) -> str: + if not jd: + return "" + jd = clean_html_text(jd) + jd = re.sub(r"https?://\S+", "", jd) + jd = re.sub(r"\S+@\S+\.\S+", "", jd) + jd = re.sub(r"\n{3,}", "\n\n", jd) + return jd.strip() + + +def extract_keywords(jd: str) -> list[str]: + if not jd: + return [] + skill_patterns = [ + r"\b(Python|Java|JavaScript|TypeScript|C\+\+|Go|Rust|Scala|Kotlin|Swift)\b", + r"\b(React|Angular|Vue|Node\.?js|Django|Flask|Spring)\b", + r"\b(AWS|Azure|GCP|Docker|Kubernetes|Terraform)\b", + r"\b(PostgreSQL|MySQL|MongoDB|Redis|Elasticsearch)\b", + r"\b(Git|Linux|Agile|Scrum|JIRA)\b", + r"\b(ML|AI|Machine Learning|Deep Learning|TensorFlow|PyTorch)\b", + r"\b(REST|GraphQL|gRPC|Microservices)\b", + ] + keywords: set[str] = set() + for pattern in skill_patterns: + for m in re.findall(pattern, jd, re.IGNORECASE): + keywords.add(m.lower()) + return sorted(keywords)[:20] + + +def is_meaningful_value(value: str) -> bool: + if not value: + return False + meaningless = ["n/a", "na", "null", "none", "-", "--", "...", ""] + if value.lower().strip() in meaningless: + return False + if re.match(r"^[\s\.\-\:_]+$", value): + return False + return True + + +def clean_salary(salary: str) -> dict: + if not salary or not is_meaningful_value(salary): + return {"raw": "", "min": None, "max": None, "currency": None, "period": None} + raw = salary.strip() + currency_match = re.search(r"(£|$|€|¥|USD|GBP|EUR)", salary) + currency = currency_match.group(1) if currency_match else None + currency_map = {"£": "GBP", "$": "USD", "€": "EUR", "¥": "CNY"} + if currency in currency_map: + currency = currency_map[currency] + range_match = re.search(r"(\d+(?:,\d{3})*(?:\.\d+)?)\s*[-–—to]+\s*(\d+(?:,\d{3})*(?:\.\d+)?)", salary) + if range_match: + try: + min_sal = float(range_match.group(1).replace(",", "")) + max_sal = float(range_match.group(2).replace(",", "")) + except ValueError: + min_sal = max_sal = None + else: + single_match = re.search(r"(\d+(?:,\d{3})*(?:\.\d+)?)", salary) + if single_match: + try: + min_sal = max_sal = float(single_match.group(1).replace(",", "")) + except ValueError: + min_sal = max_sal = None + else: + min_sal = max_sal = None + period = "year" + if "/hr" in salary.lower() or "/hour" in salary.lower(): + period = "hour" + elif "/month" in salary.lower(): + period = "month" + return {"raw": raw, "min": min_sal, "max": max_sal, "currency": currency, "period": period} + + +# --------------------------------------------------------------------------- +# URL normalization +# --------------------------------------------------------------------------- + +TRACKING_PARAMS = frozenset({ + "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", + "fbclid", "gclid", "gclsrc", "dclid", "gbraid", "wbraid", + "msclkid", "twclid", "sc_campaign", "sc_channel", "sc_content", + "sc_medium", "sc_outcome", "sc_geo", "sc_country", + "ref", "source", "si", "li_fat_id", + "trk", "trackingId", "tracking_id", +}) + + +def normalize_url(raw_url: str) -> str: + """Remove tracking parameters and normalize a URL for dedup. + + Returns the normalized URL string, or empty string if input is empty. + """ + if not raw_url: + return "" + try: + parsed = urlparse(raw_url) + scheme = parsed.scheme.lower() + netloc = parsed.netloc.lower() + if netloc.startswith("www."): + netloc = netloc[4:] + # Filter tracking params, preserving order + cleaned_pairs: list[str] = [] + if parsed.query: + for pair in parsed.query.split("&"): + k, _, v = pair.partition("=") + if k not in TRACKING_PARAMS: + cleaned_pairs.append(f"{k}={v}") + cleaned_query = "&".join(cleaned_pairs) + normalized = urlunparse((scheme, netloc, parsed.path, parsed.params, cleaned_query, "")) + return normalized.rstrip("?") + except Exception: + return raw_url + + +def generate_url_hash(normalized_url: str) -> str: + """Generate sha256 hex digest of a normalized URL.""" + if not normalized_url: + return "" + return hashlib.sha256(normalized_url.encode("utf-8")).hexdigest() + + +# --------------------------------------------------------------------------- +# Dedup helpers +# --------------------------------------------------------------------------- + +def dedup_by_url_hash(records: list[dict]) -> tuple[list[dict], list[dict]]: + """Dedup a list of records by url_hash. + + Within a batch, the first occurrence wins. + Returns (deduped, duplicates) where duplicates are the removed items. + """ + seen: set[str] = set() + deduped: list[dict] = [] + duplicates: list[dict] = [] + for rec in records: + h = rec.get("url_hash", "") or "" + if h and h in seen: + duplicates.append(rec) + continue + if h: + seen.add(h) + deduped.append(rec) + return deduped, duplicates + + +# --------------------------------------------------------------------------- +# Validation / dead letter +# --------------------------------------------------------------------------- + +def validate_record(record: dict) -> tuple[bool, str]: + """Check if a cleaned record is valid for Supabase upsert. + + Returns (is_valid, reason) where reason explains why invalid. + """ + if not record.get("title"): + return False, "empty title" + if not record.get("company"): + return False, "empty company" + url = record.get("url", "") or "" + external_url = record.get("external_url", "") or "" + if not url and not external_url: + return False, "no url and no external_url" + # LinkedIn records must have easy_apply=true or external_url + if record.get("source") == "linkedin": + easy_apply = record.get("easy_apply") + if not external_url and not ( + easy_apply is True or str(easy_apply).lower().strip() == "true" + ): + return False, "linkedin record must have easy_apply=true or external_url" + return True, "" + + +# --------------------------------------------------------------------------- +# Cleaning +# --------------------------------------------------------------------------- + +def clean_job_record(record: dict, source_prefix: str = "linkedin") -> dict: + """Clean a single job record. Returns the cleaned dict. + + Args: + record: Raw input record. + source_prefix: Source label (default 'linkedin'). + Sets ``source`` to the label itself and + ``source_channel`` = ``'recommended'`` when label is + ``'linkedin'``, else ``'unknown'``. + """ + cleaned: dict[str, Any] = {} + # Keep the original input for provenance + cleaned["raw_record"] = record + + # Basic fields + cleaned["title"] = clean_html_text(record.get("title", "")) + cleaned["company"] = clean_html_text(record.get("company", "")) + cleaned["location"] = clean_html_text(record.get("location", "")) + cleaned["workplace_type"] = record.get("workplace_type", "") + + # Salary + salary_info = clean_salary(record.get("salary", "")) + cleaned["salary"] = salary_info if salary_info.get("raw") else {"raw": "", "min": None, "max": None, "currency": None, "period": None} + + # Time & apply + cleaned["posted_time"] = record.get("posted_time", "") + cleaned["applicant_count"] = record.get("applicant_count", "") + cleaned["easy_apply"] = (record.get("easy_apply", "false") == "true") + + # URLs — try multiple field names for the LinkedIn job URL + raw_url = "" + for key in ("source_url", "linkedin_url", "job_url", "url"): + v = str(record.get(key, "") or "") + if v: + raw_url = v + break + raw_external_url = record.get("external_url", "") or "" + cleaned["url"] = raw_url + cleaned["external_url"] = raw_external_url + + # Normalize URL and generate hash + dedup_target = raw_url or raw_external_url + normalized = normalize_url(dedup_target) + cleaned["url_normalized"] = normalized + cleaned["url_hash"] = generate_url_hash(normalized) + + # JD + raw_jd = record.get("jd", "") + cleaned["jd"] = clean_jd(raw_jd) + cleaned["skills"] = extract_keywords(raw_jd) + + # Work type + workplace = (record.get("workplace_type") or "").lower() + if "remote" in workplace: + cleaned["work_type"] = "Remote" + elif "hybrid" in workplace: + cleaned["work_type"] = "Hybrid" + elif "on-site" in workplace or "onsite" in workplace: + cleaned["work_type"] = "On-site" + else: + cleaned["work_type"] = "Unknown" + + # Source & channel + cleaned["source"] = source_prefix + # LinkedIn recommended → source_channel = recommended; other sources → unknown + if source_prefix == "linkedin": + cleaned["source_channel"] = "recommended" + else: + cleaned["source_channel"] = "unknown" + + # Apply type + raw_easy_apply = record.get("easy_apply") + if raw_easy_apply is None: + cleaned["apply_type"] = "unknown" + else: + raw_str = str(raw_easy_apply).lower().strip() + if raw_str in ("true", "1", "yes"): + cleaned["apply_type"] = "easy_apply" + else: + cleaned["apply_type"] = "external" + + cleaned["scraped_at"] = None + + return cleaned + + +def clean_jobs(input_data: list[dict], stats: bool = True) -> list[dict]: + """Clean a list of job records.""" + cleaned = [clean_job_record(job) for job in input_data] + + if stats: + stats_dict: dict[str, Any] = { + "total": len(cleaned), + "with_jd": sum(1 for j in cleaned if j.get("jd")), + "with_salary": sum(1 for j in cleaned if j.get("salary", {}).get("raw")), + "with_url_hash": sum(1 for j in cleaned if j.get("url_hash")), + "easy_apply": sum(1 for j in cleaned if j.get("easy_apply")), + "work_types": {}, + } + for job in cleaned: + wt = job.get("work_type", "Unknown") + stats_dict["work_types"][wt] = stats_dict["work_types"].get(wt, 0) + 1 + print("=" * 50, file=sys.stderr) + print("清洗统计:", file=sys.stderr) + for k, v in stats_dict.items(): + if k == "work_types": + continue + print(f" {k}: {v}", file=sys.stderr) + print(f" 工作类型: {stats_dict['work_types']}", file=sys.stderr) + print("=" * 50, file=sys.stderr) + + return cleaned + + +# --------------------------------------------------------------------------- +# Supabase write via sync_autocli_jobs.py +# --------------------------------------------------------------------------- + +def write_to_supabase(cleaned: list[dict], dead_letter_path: str | None = None) -> int: + """Write cleaned records to Supabase via sync_autocli_jobs.py. + + Args: + cleaned: List of cleaned job records. + dead_letter_path: Optional file path for dead letter queue. + + Returns: + 0 on success, 1 on failure. + """ + # Validate and separate dead letter + valid: list[dict] = [] + dead_letter: list[dict] = [] + for rec in cleaned: + ok, reason = validate_record(rec) + if ok: + valid.append(rec) + else: + rec["_skip_reason"] = reason + dead_letter.append(rec) + + if dead_letter: + print(f"Dead letter: {len(dead_letter)} records skipped", file=sys.stderr) + for dl in dead_letter: + print(f" SKIP: [{dl.get('_skip_reason', '?')}] {dl.get('title', '?')} @ {dl.get('company', '?')}", file=sys.stderr) + if dead_letter_path: + with open(dead_letter_path, "w", encoding="utf-8") as f: + json.dump(dead_letter, f, ensure_ascii=False, indent=2) + print(f"Dead letter 写入: {dead_letter_path}", file=sys.stderr) + + if not valid: + print("没有有效记录可写入 Supabase", file=sys.stderr) + return 0 + + # Dedup by url_hash within batch + valid, duplicates = dedup_by_url_hash(valid) + if duplicates: + print(f"批内去重移除: {len(duplicates)} 条重复记录", file=sys.stderr) + if dead_letter_path and duplicates: + existing_dead = [] + try: + with open(dead_letter_path, "r", encoding="utf-8") as f: + existing_dead = json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + pass + for dl in sorted(duplicates, key=lambda x: x.get("url_hash", "")): + dl["_skip_reason"] = "batch_dedup" + existing_dead.append(dl) + with open(dead_letter_path, "w", encoding="utf-8") as f: + json.dump(existing_dead, f, ensure_ascii=False, indent=2) + + # Map cleaned fields to sync script format + rows: list[dict[str, Any]] = [map_row_for_sync(rec) for rec in valid] + + # Write to temp file, pipe through sync script + fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="linkedin_cleaned_") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(rows, f, ensure_ascii=False, indent=2) + + sync_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "sync_autocli_jobs.py") + # Use uv run so we get the project venv where supabase is installed + result = subprocess.run( + ["uv", "run", "--directory", os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + sync_script, "--input", tmp_path, "--source", "linkedin"], + capture_output=True, text=True, + ) + if result.stdout: + print(result.stdout, file=sys.stderr) + if result.stderr: + print(result.stderr, file=sys.stderr) + if result.returncode != 0: + print(f"sync_autocli_jobs.py 退出码 {result.returncode}", file=sys.stderr) + return 1 + finally: + os.unlink(tmp_path) + + return 0 + + +def map_row_for_sync(cleaned: dict[str, Any]) -> dict[str, Any]: + """Map a cleaned job record to the format expected by sync_autocli_jobs.py. + + Key mapping invariants: + + - ``url``: the actual LinkedIn job URL (for reference in DB). + - ``url_normalized``: tracking-stripped version (only used for hash computation). + - ``url_hash``: sha256 of the normalized URL (for dedup). + - ``apply_url``: for *external* jobs, the external application URL; + for *easy_apply* jobs, empty string (stored as NULL in DB). + """ + row: dict[str, Any] = { + "job_title": cleaned.get("title", ""), + "company_name": cleaned.get("company", ""), + "location": cleaned.get("location", ""), + "salary": cleaned.get("salary", {}).get("raw", "") if isinstance(cleaned.get("salary"), dict) else "", + "post_time": cleaned.get("posted_time", ""), + "external_url": cleaned.get("external_url", ""), + "job_description": cleaned.get("jd", ""), + "url": cleaned.get("url", ""), # LinkedIn job URL (raw, for reference) + "url_normalized": cleaned.get("url_normalized", ""), + "url_hash": cleaned.get("url_hash", ""), + "source": cleaned.get("source", ""), + "source_channel": cleaned.get("source_channel", ""), + "apply_type": cleaned.get("apply_type", ""), + "raw_record": cleaned.get("raw_record"), + } + # apply_url: external jobs get external URL; easy_apply gets empty (NULL in DB) + row["apply_url"] = ( + cleaned.get("external_url", "") if cleaned.get("apply_type") == "external" else "" + ) + return row + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser(description="清洗 LinkedIn 职位数据") + parser.add_argument("input", help="输入 JSON 文件路径 (或 - 表示 stdin)") + parser.add_argument("-o", "--output", help="输出 JSON 文件路径(默认 stdout)") + parser.add_argument("--no-stats", action="store_true", help="不显示统计") + parser.add_argument("--to-db", action="store_true", help="写入 Supabase(通过 sync_autocli_jobs.py)") + parser.add_argument("--dead-letter-file", help="Dead letter 输出文件路径") + args = parser.parse_args() + + # Read input + if args.input == "-": + data = json.load(sys.stdin) + else: + with open(args.input, "r", encoding="utf-8") as f: + data = json.load(f) + + if not isinstance(data, list): + raise ValueError("输入必须是 JSON 数组") + + # Clean + cleaned = clean_jobs(data, stats=not args.no_stats) + + # Write to DB or output JSON + if args.to_db: + rc = write_to_supabase(cleaned, dead_letter_path=args.dead_letter_file) + raise SystemExit(rc) + else: + output = json.dumps(cleaned, ensure_ascii=False, indent=2) + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(output) + else: + print(output) + + +if __name__ == "__main__": + main() diff --git a/scripts/jd_pipeline.py b/scripts/jd_pipeline.py new file mode 100644 index 0000000..010801a --- /dev/null +++ b/scripts/jd_pipeline.py @@ -0,0 +1,697 @@ +#!/usr/bin/env python3 +"""JD Structured Extraction Pipeline + +Reads raw JDs from output/final.json, preprocesses them, sends to local LLM +for structured JSON extraction, and stores results in Supabase. + +Usage: + python scripts/jd_pipeline.py [--input output/final.json] [--dry-run] [--limit N] +""" + +from __future__ import annotations + +import argparse +import asyncio +import hashlib +import json +import logging +import os +import signal +import sys +import time +from datetime import datetime +from pathlib import Path +from typing import Any + +from jsonschema import ValidationError, validate + +# --------------------------------------------------------------------------- +# Ensure scripts/ is on sys.path so sibling modules are importable +# --------------------------------------------------------------------------- +_scripts_dir = str(Path(__file__).resolve().parent) +if _scripts_dir not in sys.path: + sys.path.insert(0, _scripts_dir) + +from jd_pipeline_config import ( # noqa: E402 + EXTRACTOR, + EXTRACTOR_VERSION, + INPUT_FILE, + JD_SCHEMA, + LLM_BASE_URL, + LLM_MODEL, + PROMPT_VERSION, + SCHEMA_VERSION, +) +from jd_pipeline_db import DatabaseClient, DatabaseError # noqa: E402 +from jd_pipeline_llm import LLMClient, LLMError # noqa: E402 +from jd_pipeline_preprocess import compute_hash, preprocess, validate_input_row # noqa: E402 + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def url_hash(url: str) -> str: + """Compute full SHA-256 hex digest of *url*. + + Matches the existing convention in the ``jobs`` table where ``url_hash`` + is the complete 64-character hex digest (NOT truncated). + """ + return hashlib.sha256(url.encode("utf-8")).hexdigest() + + +def _should_skip( + row: dict[str, Any], + current_raw_hash: str, + current_cleaned_hash: str, +) -> bool: + """Return True if *row* (from ``get_existing_jobs``) is already up-to-date. + + A job should be *re-processed* if ANY of these is true: + - jd_structured_status IS NULL + - jd_structured_status IN ('pending', 'failed') + - jd_structured_extractor_version differs from current + - jd_structured_schema_version differs from current + - jd_structured_prompt_version differs from current + - jd_structured_raw_hash differs from current + - jd_structured_cleaned_hash differs from current + + This mirrors the SQL guard conditions in ``claim_job()``. + """ + status = row.get("jd_structured_status") + + # Never processed before -> needs processing + if status is None: + return False + + # Pending or previously failed -> needs processing + if status in ("pending", "failed"): + return False + + # Already ok -> check versions and hashes + if status == "ok": + if row.get("jd_structured_extractor_version") != EXTRACTOR_VERSION: + return False + if row.get("jd_structured_schema_version") != SCHEMA_VERSION: + return False + if row.get("jd_structured_prompt_version") != PROMPT_VERSION: + return False + if row.get("jd_structured_raw_hash") != current_raw_hash: + return False + if row.get("jd_structured_cleaned_hash") != current_cleaned_hash: + return False + return True # all versions/hashes match -> skip + + # Any other status (processing, dead_letter, etc.) -> needs processing + return False + + +# --------------------------------------------------------------------------- +# Pipeline stats +# --------------------------------------------------------------------------- + + +class _JobResult: + __slots__ = ("url", "status", "stage", "error_class", "error_message") + + def __init__( + self, + url: str, + status: str, + stage: str | None = None, + error_class: str | None = None, + error_message: str | None = None, + ) -> None: + self.url = url + self.status = status # "ok" | "failed" | "skipped" + self.stage = stage + self.error_class = error_class + self.error_message = error_message + + +class PipelineStats: + """Accumulate counts and per-job results for a pipeline run.""" + + __slots__ = ("total", "success", "failed", "skipped", "_jobs") + + def __init__(self) -> None: + self.total: int = 0 + self.success: int = 0 + self.failed: int = 0 + self.skipped: int = 0 + self._jobs: list[_JobResult] = [] + + def record_ok(self, url: str) -> None: + self.success += 1 + self._jobs.append(_JobResult(url, "ok")) + + def record_failed( + self, + url: str, + stage: str, + error_class: str, + error_message: str, + ) -> None: + self.failed += 1 + self._jobs.append( + _JobResult(url, "failed", stage, error_class, error_message) + ) + + def record_skipped(self, url: str) -> None: + self.skipped += 1 + self._jobs.append(_JobResult(url, "skipped")) + + def run_report( + self, + run_id: str, + avg_latency_ms: float | None = None, + p95_latency_ms: float | None = None, + reaped: int = 0, + ) -> str: + avg_str = f"{avg_latency_ms:.0f} ms" if avg_latency_ms is not None else "N/A" + p95_str = f"{p95_latency_ms:.0f} ms" if p95_latency_ms is not None else "N/A" + success_rate = ( + f"{self.success / (self.success + self.failed) * 100:.1f}%" + if (self.success + self.failed) > 0 + else "N/A" + ) + + lines = [ + "=" * 60, + f" JD Pipeline Run: {run_id}", + "=" * 60, + "", + " Summary", + " -------", + f" Total input: {self.total}", + f" Success: {self.success} ({success_rate} of processed)", + f" Failed: {self.failed}", + f" Skipped: {self.skipped}", + f" Stale reaped: {reaped}", + f" Avg latency: {avg_str}", + f" P95 latency: {p95_str}", + ] + + failed_jobs = [j for j in self._jobs if j.status == "failed"] + if failed_jobs: + lines += [ + "", + " Failed Jobs Detail", + " ------------------", + ] + for i, j in enumerate(failed_jobs, 1): + msg = j.error_message or "N/A" + if len(msg) > 120: + msg = msg[:117] + "..." + lines.append(f" [{i}] {j.url}") + lines.append(f" stage: {j.stage} error: {j.error_class}") + lines.append(f" {msg}") + + lines += ["", "=" * 60] + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main pipeline +# --------------------------------------------------------------------------- + + +async def main() -> None: + parser = argparse.ArgumentParser( + description="JD Structured Extraction Pipeline" + ) + parser.add_argument( + "--input", + default=INPUT_FILE, + help=f"Path to input JSON file (default: {INPUT_FILE})", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Skip all database writes (preprocess + LLM calls still run)", + ) + parser.add_argument( + "--limit", + type=int, + default=0, + help="Process only N items (0 = all)", + ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + ) + # Also log to file + log_dir = Path(__file__).resolve().parent.parent / "output" + log_dir.mkdir(exist_ok=True) + fh = logging.FileHandler(log_dir / "jd_pipeline.log") + fh.setLevel(logging.DEBUG) + fh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")) + logging.getLogger().addHandler(fh) + + # ------------------------------------------------------------------ + # Generate run_id + # ------------------------------------------------------------------ + run_id = f"jd-extract-{datetime.now().strftime('%Y%m%d-%H%M%S')}" + logger.info("Run ID: %s", run_id) + + stats = PipelineStats() + stats_reaped = 0 + cancel_requested = False + + def _signal_handler(sig: int, frame: Any) -> None: + nonlocal cancel_requested + if cancel_requested: + logger.warning("Second interrupt -- exiting immediately.") + sys.exit(1) + cancel_requested = True + logger.warning( + "Interrupt received, finishing current batch then exiting..." + ) + + signal.signal(signal.SIGINT, _signal_handler) + + # ------------------------------------------------------------------ + # Initialise components + # ------------------------------------------------------------------ + db: DatabaseClient | None = None + llm: LLMClient | None = None + + # Load .env from scripts/ directory if present (before any DB init) + env_path = Path(__file__).resolve().parent / ".env" + if env_path.exists(): + for line in env_path.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#") and "=" in line: + key, _, value = line.partition("=") + os.environ.setdefault(key.strip(), value.strip()) + # Refresh config from environment (config may have been imported before .env loaded) + import jd_pipeline_config as _cfg + _cfg.SUPABASE_URL = os.environ.get("SUPABASE_URL", _cfg.SUPABASE_URL) + _cfg.SUPABASE_KEY = os.environ.get("SUPABASE_KEY", _cfg.SUPABASE_KEY) + + if not args.dry_run: + try: + db = DatabaseClient() + except ValueError as exc: + logger.error("Database init failed: %s", exc) + logger.error("Set SUPABASE_URL and SUPABASE_KEY env vars or create scripts/.env") + sys.exit(1) + + llm = LLMClient(base_url=LLM_BASE_URL, model=LLM_MODEL) + + try: + # -------------------------------------------------------------- + # 1. Reap stale processing rows + # -------------------------------------------------------------- + if db: + reaped = db.reap_stale_processing() + stats_reaped = reaped + logger.info("Reaped %d stale processing row(s).", reaped) + + # -------------------------------------------------------------- + # 2. Load and validate input + # -------------------------------------------------------------- + input_path = Path(args.input) + if not input_path.exists(): + logger.error("Input file not found: %s", input_path) + sys.exit(1) + + with open(input_path, "r", encoding="utf-8") as f: + raw_items: list[dict] = json.load(f) + + if args.limit > 0: + raw_items = raw_items[: args.limit] + + logger.info("Loaded %d items from %s", len(raw_items), input_path) + + valid_items: list[dict] = [] + for idx, row in enumerate(raw_items): + errors = validate_input_row(row) + if errors: + logger.warning("Row %d skipped: %s", idx, "; ".join(errors)) + continue + valid_items.append(row) + + logger.info( + "Validated: %d / %d items passed input checks.", + len(valid_items), + len(raw_items), + ) + + # -------------------------------------------------------------- + # 3. Preprocess: compute hashes and cleaned text + # -------------------------------------------------------------- + processed: list[dict[str, Any]] = [] + for row in valid_items: + jd_raw_text: str = row["jd"] + jd_cleaned, cleaned_hash = preprocess(jd_raw_text) + raw_hash = compute_hash(jd_raw_text) + uh = url_hash(row["url"]) + + processed.append( + { + "url": row["url"], + "url_hash": uh, + "source": "linkedin", + "title": row.get("title", ""), + "company": row.get("company", ""), + "jd_raw": jd_raw_text, + "jd_cleaned": jd_cleaned, + "raw_hash": raw_hash, + "cleaned_hash": cleaned_hash, + } + ) + + stats.total = len(processed) + + # -------------------------------------------------------------- + # 4. Tokenize stats (for context-size tier selection) + # -------------------------------------------------------------- + try: + token_stats = await llm.tokenize_stats( + [p["jd_cleaned"] for p in processed] + ) + logger.info( + "Token stats: p50=%d p90=%d p95=%d max=%d count=%d", + token_stats["p50"], + token_stats["p90"], + token_stats["p95"], + token_stats["max"], + token_stats["count"], + ) + except LLMError as exc: + logger.warning("Tokenize stats failed: %s (continuing)", exc) + + # -------------------------------------------------------------- + # 5. Skip policy: check which jobs are already up-to-date + # -------------------------------------------------------------- + if db: + all_url_hashes = [p["url_hash"] for p in processed] + # get_existing_jobs has Supabase IN clause limits, + # so we batch in chunks of 500. + existing: dict[str, dict[str, Any]] = {} + chunk_size = 500 + for i in range(0, len(all_url_hashes), chunk_size): + chunk = all_url_hashes[i : i + chunk_size] + chunk_result = db.get_existing_jobs(chunk) + existing.update(chunk_result) + + logger.info( + "Found %d existing job(s) in database.", len(existing) + ) + else: + existing = {} + + to_process: list[dict[str, Any]] = [] + for p in processed: + row = existing.get(p["url_hash"]) + if row and _should_skip(row, p["raw_hash"], p["cleaned_hash"]): + stats.record_skipped(p["url"]) + logger.debug("Skipping up-to-date job: %s", p["url"][:80]) + continue + to_process.append(p) + + logger.info( + "To process: %d Skipped (up-to-date): %d", + len(to_process), + stats.skipped, + ) + + # -------------------------------------------------------------- + # 5b. Ensure job rows exist in DB (insert new ones) + # -------------------------------------------------------------- + if db: + for p in to_process: + if p["url_hash"] not in existing: + db.ensure_job_exists( + url=p["url"], + url_hash=p["url_hash"], + source="linkedin", + jd_raw=p["jd_raw"], + raw_hash=p["raw_hash"], + cleaned_hash=p["cleaned_hash"], + company_name=p.get("company", ""), + job_title=p.get("title", ""), + location=p.get("location"), + salary_text=p.get("salary"), + work_mode=p.get("workplace_type"), + ) + + # -------------------------------------------------------------- + # 6. Claim jobs (database-level lock) + # -------------------------------------------------------------- + if db: + claimed: list[dict[str, Any]] = [] + for p in to_process: + if cancel_requested: + break + claim_id = db.claim_job( + url_hash=p["url_hash"], + run_id=run_id, + raw_hash=p["raw_hash"], + cleaned_hash=p["cleaned_hash"], + ) + if claim_id is not None: + claimed.append(p) + else: + # Another run claimed it, or it's now up-to-date + stats.record_skipped(p["url"]) + logger.debug( + "Claim failed (already claimed/up-to-date): %s", + p["url"][:80], + ) + to_process = claimed + logger.info("Claimed %d job(s) for processing.", len(to_process)) + + # -------------------------------------------------------------- + # 7. Create extraction run record + # -------------------------------------------------------------- + if db: + db.create_extraction_run( + run_id=run_id, + input_file=str(input_path), + total_count=stats.total, + model=LLM_MODEL, + ) + + # -------------------------------------------------------------- + # 8. Extract: send batches to LLM + # -------------------------------------------------------------- + if not to_process: + logger.info("No jobs to extract.") + else: + extraction_items = [ + (p["jd_cleaned"], JD_SCHEMA) for p in to_process + ] + + t_start = time.monotonic() + results = await llm.extract_batch(extraction_items) + elapsed = time.monotonic() - t_start + + logger.info( + "Extraction batch completed in %.1f s (%d items).", + elapsed, + len(results), + ) + + # ---------------------------------------------------------- + # 9. Validate and upsert / dead-letter + # ---------------------------------------------------------- + for idx, result in enumerate(results): + if cancel_requested: + break + + job = to_process[idx] + + if result is not None: + # Validate against schema + try: + validate(instance=result, schema=JD_SCHEMA) + # Success + stats.record_ok(job["url"]) + if db: + db.upsert_job( + url=job["url"], + url_hash=job["url_hash"], + source=job["source"], + jd_raw=job["jd_raw"], + jd_structured=result, + run_id=run_id, + raw_hash=job["raw_hash"], + cleaned_hash=job["cleaned_hash"], + ) + logger.info( + "[%d/%d] OK: %s", + idx + 1, + len(to_process), + job["url"][:80], + ) + except ValidationError as verr: + # Schema validation failed -> dead letter + stats.record_failed( + url=job["url"], + stage="validate", + error_class="ValidationError", + error_message=str(verr.message), + ) + if db: + try: + db.mark_dead_letter( + url_hash=job["url_hash"], + run_id=run_id, + url=job["url"], + stage="validate", + error_class="ValidationError", + error_message=str(verr.message), + validation_errors=[ + str(p) for p in verr.absolute_path + ], + attempt_count=3, + model=EXTRACTOR, + ) + except Exception as dl_exc: + logger.error( + "Failed to write dead_letter for %s: %s", + job["url"][:80], dl_exc, + ) + logger.warning( + "[%d/%d] VALIDATION FAILED: %s -- %s", + idx + 1, + len(to_process), + job["url"][:80], + verr.message, + ) + else: + # LLM returned None (all 3 attempts failed) + stats.record_failed( + url=job["url"], + stage="llm_extract", + error_class="LLMAllAttemptsFailed", + error_message="All 3 LLM extraction attempts returned None.", + ) + if db: + try: + db.mark_dead_letter( + url_hash=job["url_hash"], + run_id=run_id, + url=job["url"], + stage="llm_extract", + error_class="LLMAllAttemptsFailed", + error_message="All 3 LLM extraction attempts returned None.", + attempt_count=3, + model=EXTRACTOR, + ) + except Exception as dl_exc: + logger.error( + "Failed to write dead_letter for %s: %s", + job["url"][:80], dl_exc, + ) + logger.warning( + "[%d/%d] FAILED: %s", + idx + 1, + len(to_process), + job["url"][:80], + ) + + # Progress log every 10 jobs + processed_count = stats.success + stats.failed + if processed_count % 10 == 0 and processed_count > 0: + logger.info( + "Progress: %d/%d processed (%d ok, %d failed)", + processed_count, + len(to_process), + stats.success, + stats.failed, + ) + + except Exception as exc: + logger.exception("Pipeline aborted: %s", exc) + raise + finally: + # ---------------------------------------------------------- + # 10. Finalise extraction run record + # ---------------------------------------------------------- + # Compute latency metrics from the LLM client's per-request + # latency records. + avg_latency_ms: float | None = None + p95_latency_ms: float | None = None + if llm and llm._latencies: + lats = sorted(llm._latencies) + avg_latency_ms = sum(lats) / len(lats) * 1000 + idx = max(0, int(len(lats) * 0.95) - 1) + p95_latency_ms = lats[idx] * 1000 + + if db: + try: + db.update_extraction_run( + run_id=run_id, + success=stats.success, + failed=stats.failed, + skipped=stats.skipped, + avg_latency_ms=avg_latency_ms, + p95_latency_ms=p95_latency_ms, + ) + except Exception as exc: + logger.error("Failed to update extraction run: %s", exc) + + # Close LLM client + if llm: + await llm.close() + + # ------------------------------------------------------------------ + # 11. Print summary + # ------------------------------------------------------------------ + report = stats.run_report( + run_id=run_id, + avg_latency_ms=avg_latency_ms, + p95_latency_ms=p95_latency_ms, + reaped=stats_reaped, + ) + print(report) + logger.info("Run report:\n%s", report) + + # Write structured JSON report for programmatic consumption + report_path = log_dir / f"jd_pipeline_{run_id}.json" + failed_jobs = [ + { + "url": j.url, + "stage": j.stage, + "error_class": j.error_class, + "error_message": j.error_message, + } + for j in stats._jobs + if j.status == "failed" + ] + report_json = { + "run_id": run_id, + "total": stats.total, + "success": stats.success, + "failed": stats.failed, + "skipped": stats.skipped, + "stale_reaped": stats_reaped, + "success_rate": ( + f"{stats.success / (stats.success + stats.failed) * 100:.1f}%" + if (stats.success + stats.failed) > 0 + else "N/A" + ), + "avg_latency_ms": round(avg_latency_ms) if avg_latency_ms else None, + "p95_latency_ms": round(p95_latency_ms) if p95_latency_ms else None, + "failed_jobs": failed_jobs, + } + report_path.write_text(json.dumps(report_json, indent=2, ensure_ascii=False)) + logger.info("JSON report written to %s", report_path) + + if args.dry_run: + print(" (DRY RUN -- no database writes)") + + if cancel_requested: + logger.warning("Pipeline interrupted by user.") + sys.exit(130) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/scripts/jd_pipeline_config.py b/scripts/jd_pipeline_config.py new file mode 100644 index 0000000..318a651 --- /dev/null +++ b/scripts/jd_pipeline_config.py @@ -0,0 +1,162 @@ +"""Configuration constants for the JD structured extraction pipeline. + +Version tracking, LLM server config, concurrency, timeouts, token limits, +schema definitions for validation, and Supabase connection parameters. +""" + +import os + +# --------------------------------------------------------------------------- +# Version tracking +# --------------------------------------------------------------------------- +EXTRACTOR = "qwen3-jd-parser" +EXTRACTOR_VERSION = "v1" +SCHEMA_VERSION = "v1" +PROMPT_VERSION = "linkedin-v1" +PREPROCESS_VERSION = "linkedin-jd-clean-v1" + +# --------------------------------------------------------------------------- +# LLM server config +# --------------------------------------------------------------------------- +LLM_BASE_URL = "http://127.0.0.1:8091" +LLM_MODEL = "qwen3-jd-parser.gguf" + +# --------------------------------------------------------------------------- +# Concurrency +# --------------------------------------------------------------------------- +DEFAULT_SEMAPHORE = 6 + +# --------------------------------------------------------------------------- +# Timeouts (seconds) +# --------------------------------------------------------------------------- +DEFAULT_TIMEOUT = 120.0 +MAX_TIMEOUT = 300.0 +MIN_TIMEOUT = 60.0 + +# --------------------------------------------------------------------------- +# Token limits +# --------------------------------------------------------------------------- +DEFAULT_MAX_TOKENS = 1536 +EVIDENCE_MAX_TOKENS = 3072 +HARD_MAX_TOKENS = 4096 + +# --------------------------------------------------------------------------- +# Stale processing reaper threshold +# --------------------------------------------------------------------------- +STALE_PROCESSING_MINUTES = 30 + +# --------------------------------------------------------------------------- +# Context size thresholds for server -c setting +# --------------------------------------------------------------------------- +# Mapping of server context window size -> p95 token threshold. +# If the p95 token count of a batch is below the threshold, the smaller +# context window is sufficient. +CTX_SIZE_TIERS = { + 8192: 6000, # p95 < 6000 -> -c 8192 + 12288: 10000, # p95 < 10000 -> -c 12288 + 16384: float("inf"), # p95 >= 10000 -> -c 16384 +} + +# --------------------------------------------------------------------------- +# JD Schema for output validation +# --------------------------------------------------------------------------- +JD_SCHEMA = { + "type": "object", + "additionalProperties": False, + "required": [ + "job_title", + "company_name", + "skills", + "summary", + ], + "properties": { + "job_title": {"type": "string", "minLength": 1}, + "company_name": {"type": "string", "minLength": 1}, + "location": {"type": ["string", "null"]}, + "salary_range": {"type": ["string", "null"]}, + "skills": { + "type": "array", + "items": {"type": "string", "minLength": 1}, + "maxItems": 50, + }, + "responsibilities": { + "type": ["array", "null"], + "items": {"type": "string", "minLength": 1}, + "maxItems": 12, + }, + "qualifications": { + "type": ["array", "null"], + "items": {"type": "string", "minLength": 1}, + "maxItems": 12, + }, + "experience_level": { + "type": ["string", "null"], + "enum": [ + "intern", + "junior", + "mid", + "senior", + "lead", + "principal", + "unknown", + None, + ], + }, + "employment_type": { + "type": ["string", "null"], + "enum": [ + "full_time", + "part_time", + "contract", + "temporary", + "internship", + "unknown", + None, + ], + }, + "summary": {"type": "string", "minLength": 10, "maxLength": 800}, + "confidence": { + "type": ["object", "null"], + "type": "object", + "additionalProperties": False, + "required": ["overall", "missing_fields"], + "properties": { + "overall": {"type": "number", "minimum": 0, "maximum": 1}, + "missing_fields": { + "type": "array", + "items": {"type": "string"}, + }, + }, + }, + }, +} + +# --------------------------------------------------------------------------- +# Minimal schema for retry attempt 3 (only core fields) +# --------------------------------------------------------------------------- +MINIMAL_SCHEMA = { + "type": "object", + "additionalProperties": False, + "required": ["job_title", "company_name", "skills", "summary"], + "properties": { + "job_title": {"type": "string", "minLength": 1}, + "company_name": {"type": "string", "minLength": 1}, + "skills": { + "type": "array", + "items": {"type": "string", "minLength": 1}, + "maxItems": 50, + }, + "summary": {"type": "string", "minLength": 10, "maxLength": 800}, + }, +} + +# --------------------------------------------------------------------------- +# Supabase config - read from environment variables +# --------------------------------------------------------------------------- +SUPABASE_URL = os.environ.get("SUPABASE_URL", "") +SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "") + +# --------------------------------------------------------------------------- +# Input source file +# --------------------------------------------------------------------------- +INPUT_FILE = "output/final.json" diff --git a/scripts/jd_pipeline_db.py b/scripts/jd_pipeline_db.py new file mode 100644 index 0000000..449d0a0 --- /dev/null +++ b/scripts/jd_pipeline_db.py @@ -0,0 +1,594 @@ +"""Supabase database operations for the JD structured extraction pipeline. + +Provides :class:`DatabaseClient` with methods for atomic job claiming, +upserting extraction results, dead-letter recording, stale processing +reaping, and extraction-run bookkeeping. + +Requires the following RPC functions (defined in migration +``20260502203205_create_jd_pipeline_rpc_functions.sql`` and +``20260502203206_create_mark_dead_letter_rpc.sql``): + +* ``claim_job`` -- atomically claim a pending / version-stale row. +* ``upsert_job_structured`` -- upsert extraction result guarded by run_id. +* ``mark_dead_letter`` -- atomically mark a job as dead_letter and insert a + dead_letter_records row. +* ``reap_stale_processing`` -- reset rows stuck in ``processing``. +""" + +from __future__ import annotations + +import json +import logging +from datetime import datetime, timezone +from typing import Any + +from supabase import Client, create_client + +from jd_pipeline_config import ( + EXTRACTOR, + EXTRACTOR_VERSION, + PROMPT_VERSION, + SCHEMA_VERSION, + STALE_PROCESSING_MINUTES, +) +import jd_pipeline_config as _cfg + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + + +class DatabaseError(Exception): + """Base error for database operations.""" + + +# --------------------------------------------------------------------------- +# Client +# --------------------------------------------------------------------------- + + +class DatabaseClient: + """Supabase client wrapper for JD pipeline operations. + + Parameters + ---------- + url: + Supabase project URL (defaults to ``SUPABASE_URL`` env var). + key: + Supabase service-role / anon key (defaults to ``SUPABASE_KEY`` env var). + """ + + def __init__(self, url: str | None = None, key: str | None = None) -> None: + url = url or _cfg.SUPABASE_URL + key = key or _cfg.SUPABASE_KEY + if not url or not key: + raise ValueError( + "SUPABASE_URL and SUPABASE_KEY must be set either via " + "constructor arguments or environment variables." + ) + self._client: Client = create_client(url, key) + + # ------------------------------------------------------------------ + # claim_job + # ------------------------------------------------------------------ + + def claim_job( + self, + url_hash: str, + run_id: str, + raw_hash: str, + cleaned_hash: str, + ) -> int | None: + """Atomically claim a job for processing. + + Calls the ``claim_job`` RPC which performs:: + + UPDATE jobs SET + jd_structured_status = 'processing', + processing_run_id = p_run_id, + processing_started_at = now() + WHERE url_hash = p_url_hash + AND ( jd_structured_status IS NULL + OR jd_structured_status IN ('pending', 'failed') + OR jd_structured_extractor_version + IS DISTINCT FROM p_extractor_ver + OR jd_structured_schema_version + IS DISTINCT FROM p_schema_ver + OR jd_structured_prompt_version + IS DISTINCT FROM p_prompt_ver + OR jd_structured_raw_hash IS DISTINCT FROM p_raw_hash + OR jd_structured_cleaned_hash IS DISTINCT FROM p_cleaned_hash) + RETURNING id; + + Only rows that match the guard conditions get updated. If no row + matched (already claimed or same version already processed), returns + ``None``. + + Parameters + ---------- + url_hash: + SHA-256 of the job URL. + run_id: + Unique run identifier for this pipeline invocation. + raw_hash: + SHA-256 of the raw JD text (for staleness detection). + cleaned_hash: + SHA-256 of the cleaned JD text (for staleness detection). + + Returns + ------- + int or None + The ``jobs.id`` of the claimed row, or ``None``. + """ + try: + resp = self._client.rpc( + "claim_job", + { + "p_url_hash": url_hash, + "p_run_id": run_id, + "p_extractor_ver": EXTRACTOR_VERSION, + "p_schema_ver": SCHEMA_VERSION, + "p_prompt_ver": PROMPT_VERSION, + "p_raw_hash": raw_hash, + "p_cleaned_hash": cleaned_hash, + }, + ).execute() + + rows = resp.data + if rows and len(rows) > 0: + return rows[0].get("id") + return None + except Exception as exc: + raise DatabaseError(f"claim_job failed for {url_hash}: {exc}") from exc + + # ------------------------------------------------------------------ + # ensure_job_exists + # ------------------------------------------------------------------ + + def ensure_job_exists( + self, + url: str, + url_hash: str, + source: str, + jd_raw: str, + raw_hash: str, + cleaned_hash: str, + company_name: str = "", + job_title: str = "", + location: str | None = None, + salary_text: str | None = None, + posted_date: str | None = None, + work_mode: str | None = None, + ) -> None: + """Insert a pending job row if it does not already exist. + + Uses INSERT ... ON CONFLICT DO NOTHING so it's idempotent. + After this, ``claim_job`` can find and lock the row. + + Parameters + ---------- + url: + Original job URL. + url_hash: + SHA-256 of the job URL. + source: + Source platform name (e.g. ``"linkedin"``). + jd_raw: + Original raw JD text. + raw_hash: + SHA-256 of ``jd_raw``. + cleaned_hash: + SHA-256 of the cleaned text. + company_name: + Company name from source data. + job_title: + Job title from source data. + location: + Job location (optional). + salary_text: + Salary text from source data (optional). + posted_date: + Posted date ISO string (optional). + work_mode: + Work mode e.g. Remote, Hybrid (optional). + """ + try: + row = { + "url": url, + "url_hash": url_hash, + "source": source, + "company_name": company_name, + "job_title": job_title, + "jd_raw": jd_raw, + "jd_structured_status": "pending", + "jd_structured_extractor": EXTRACTOR, + "jd_structured_extractor_version": EXTRACTOR_VERSION, + "jd_structured_schema_version": SCHEMA_VERSION, + "jd_structured_prompt_version": PROMPT_VERSION, + "jd_structured_raw_hash": raw_hash, + "jd_structured_cleaned_hash": cleaned_hash, + } + if location is not None: + row["location"] = location + if salary_text is not None: + row["salary_text"] = salary_text + if posted_date is not None: + row["posted_date"] = posted_date + if work_mode is not None: + row["work_mode"] = work_mode + self._client.table("jobs").insert(row).execute() + except Exception as exc: + # Unique violation (23505) means the row already exists — that's fine. + if "23505" in str(exc) or "duplicate" in str(exc).lower(): + return + raise DatabaseError(f"ensure_job_exists failed for {url_hash}: {exc}") from exc + + # ------------------------------------------------------------------ + # upsert_job + # ------------------------------------------------------------------ + + def upsert_job( + self, + url: str, + url_hash: str, + source: str, + jd_raw: str, + jd_structured: dict, + run_id: str, + raw_hash: str, + cleaned_hash: str, + ) -> None: + """Upsert a structured extraction result into the ``jobs`` table. + + Uses the ``upsert_job_structured`` RPC which performs:: + + INSERT INTO jobs (...) + VALUES (...) + ON CONFLICT (url_hash) + DO UPDATE SET + jd_structured = EXCLUDED.jd_structured, + jd_structured_status = 'ok', + ... + WHERE jobs.processing_run_id = p_run_id; + + The ``WHERE jobs.processing_run_id = p_run_id`` guard prevents + overwriting results from a different (newer) run. + + Parameters + ---------- + url: + Original job URL. + url_hash: + SHA-256 of the job URL. + source: + Source platform name (e.g. ``"linkedin"``). + jd_raw: + Original raw JD text (immutable). + jd_structured: + The extracted JSON object from the LLM. + run_id: + Run identifier for the WHERE guard. + raw_hash: + SHA-256 of ``jd_raw``. + cleaned_hash: + SHA-256 of the cleaned text. + """ + try: + self._client.rpc( + "upsert_job_structured", + { + "p_url": url, + "p_url_hash": url_hash, + "p_source": source, + "p_jd_raw": jd_raw, + "p_jd_structured": json.dumps(jd_structured), + "p_jd_structured_status": "ok", + "p_jd_structured_extractor": EXTRACTOR, + "p_jd_structured_extractor_version": EXTRACTOR_VERSION, + "p_jd_structured_schema_version": SCHEMA_VERSION, + "p_jd_structured_prompt_version": PROMPT_VERSION, + "p_jd_structured_raw_hash": raw_hash, + "p_jd_structured_cleaned_hash": cleaned_hash, + "p_run_id": run_id, + }, + ).execute() + except Exception as exc: + raise DatabaseError( + f"upsert_job failed for {url_hash}: {exc}" + ) from exc + + # ------------------------------------------------------------------ + # mark_dead_letter + # ------------------------------------------------------------------ + + def mark_dead_letter( + self, + url_hash: str, + run_id: str, + url: str, + stage: str, + error_class: str, + error_message: str, + raw_response: str | None = None, + validation_errors: list[str] | None = None, + attempt_count: int = 0, + model: str | None = None, + ) -> None: + """Mark a job as dead-letter and record the failure details. + + Calls the ``mark_dead_letter`` RPC which atomically:: + + 1. UPDATE jobs SET + jd_structured_status = 'dead_letter', + processing_run_id = NULL + WHERE url_hash = p_url_hash + AND processing_run_id = p_run_id + RETURNING id, source; + + 2. INSERT INTO dead_letter_records ( + source_job_id, source, url, stage, error_class, + error_message, raw_response, validation_errors, + attempt_count, model, prompt_version, schema_version + ) + SELECT id, source, p_url, ... FROM updated; + + If the UPDATE matches no rows (e.g. the row was already claimed by a + newer run), no dead-letter record is inserted either. + + Parameters + ---------- + url_hash: + SHA-256 of the job URL. + run_id: + Run identifier for the WHERE guard on the UPDATE. + url: + Original job URL. + stage: + Pipeline stage where the error occurred + (e.g. ``"preprocess"``, ``"llm_extract"``, ``"validate"``). + error_class: + Short error class name (e.g. ``"LLMTimeoutError"``). + error_message: + Human-readable error description. + raw_response: + Raw text returned by the LLM (if any). + validation_errors: + List of schema validation error messages (if applicable). + attempt_count: + Which attempt number failed (0, 1, 2, or 3). + model: + Extractor name (e.g. ``"qwen3-jd-parser"``). + Defaults to :data:`EXTRACTOR`. + """ + try: + self._client.rpc( + "mark_dead_letter", + { + "p_url_hash": url_hash, + "p_run_id": run_id, + "p_url": url, + "p_stage": stage, + "p_error_class": error_class, + "p_error_message": error_message, + "p_raw_response": raw_response, + "p_validation_errors": ( + json.dumps(validation_errors) + if validation_errors is not None + else None + ), + "p_attempt_count": attempt_count, + "p_model": model or EXTRACTOR, + "p_prompt_version": PROMPT_VERSION, + "p_schema_version": SCHEMA_VERSION, + }, + ).execute() + + except Exception as exc: + raise DatabaseError( + f"mark_dead_letter failed for {url_hash}: {exc}" + ) from exc + + # ------------------------------------------------------------------ + # reap_stale_processing + # ------------------------------------------------------------------ + + def reap_stale_processing( + self, stale_minutes: int | None = None + ) -> int: + """Reap rows stuck in ``processing`` for longer than the threshold. + + Calls the ``reap_stale_processing`` RPC which resets them to + ``pending`` so a future run will re-process them. + + Parameters + ---------- + stale_minutes: + Staleness threshold in minutes. Defaults to + ``STALE_PROCESSING_MINUTES`` (30). + + Returns + ------- + int + Number of rows reaped. + """ + try: + resp = self._client.rpc( + "reap_stale_processing", + { + "p_stale_minutes": ( + stale_minutes or STALE_PROCESSING_MINUTES + ) + }, + ).execute() + + rows = resp.data + if rows and len(rows) > 0: + return int(rows[0].get("reaped_count", 0)) + return 0 + except Exception as exc: + raise DatabaseError(f"reap_stale_processing failed: {exc}") from exc + + # ------------------------------------------------------------------ + # extraction runs + # ------------------------------------------------------------------ + + def create_extraction_run( + self, + run_id: str, + input_file: str, + total_count: int, + model: str, + server_params: dict | None = None, + ) -> None: + """Create a new extraction run record. + + Parameters + ---------- + run_id: + Unique run identifier (e.g. UUID). + input_file: + Path or name of the input file processed. + total_count: + Total number of jobs in the input. + model: + Model name (e.g. ``"qwen3-jd-parser.gguf"``). + server_params: + Arbitrary JSON-serialisable server parameters + (e.g. ``{"context_size": 8192, "n_gpu_layers": 35}``). + """ + try: + self._client.table("extraction_runs").insert( + { + "run_id": run_id, + "input_file": input_file, + "total_count": total_count, + "model": model, + "server_params": ( + json.dumps(server_params) if server_params else None + ), + "prompt_version": PROMPT_VERSION, + "schema_version": SCHEMA_VERSION, + "extractor_version": EXTRACTOR_VERSION, + } + ).execute() + except Exception as exc: + raise DatabaseError( + f"create_extraction_run failed for {run_id}: {exc}" + ) from exc + + def update_extraction_run( + self, + run_id: str, + success: int, + failed: int, + skipped: int, + avg_latency_ms: float | None = None, + p95_latency_ms: float | None = None, + avg_prompt_tokens: int | None = None, + avg_completion_tokens: int | None = None, + ) -> None: + """Finalise an extraction run record with completion stats. + + Sets ``finished_at`` to current time. + + Parameters + ---------- + run_id: + Run identifier to update. + success: + Number of successful extractions. + failed: + Number of failed extractions. + skipped: + Number of skipped jobs (already processed, up-to-date). + avg_latency_ms: + Average per-request latency in milliseconds. + p95_latency_ms: + P95 per-request latency in milliseconds. + avg_prompt_tokens: + Average prompt token count. + avg_completion_tokens: + Average completion token count. + """ + update: dict[str, Any] = { + "finished_at": datetime.now(timezone.utc).isoformat(), + "success_count": success, + "failed_count": failed, + "skipped_count": skipped, + } + if avg_latency_ms is not None: + update["avg_latency_ms"] = avg_latency_ms + if p95_latency_ms is not None: + update["p95_latency_ms"] = p95_latency_ms + if avg_prompt_tokens is not None: + update["avg_prompt_tokens"] = avg_prompt_tokens + if avg_completion_tokens is not None: + update["avg_completion_tokens"] = avg_completion_tokens + + try: + self._client.table("extraction_runs").update( + update + ).eq("run_id", run_id).execute() + except Exception as exc: + raise DatabaseError( + f"update_extraction_run failed for {run_id}: {exc}" + ) from exc + + # ------------------------------------------------------------------ + # get_existing_jobs + # ------------------------------------------------------------------ + + def get_existing_jobs( + self, url_hashes: list[str] + ) -> dict[str, dict[str, Any]]: + """Return existing jobs matching the given URL hashes. + + Selects version and hash columns for staleness comparison:: + + SELECT url_hash, jd_structured_status, + jd_structured_extractor_version, + jd_structured_schema_version, + jd_structured_prompt_version, + jd_structured_raw_hash, jd_structured_cleaned_hash + FROM jobs + WHERE url_hash IN (...) + + Parameters + ---------- + url_hashes: + List of hashes to look up. + + Returns + ------- + dict[str, dict] + Mapping of ``url_hash`` -> row data dict. Only hashes that + exist in the database appear as keys. + """ + if not url_hashes: + return {} + + try: + resp = ( + self._client.table("jobs") + .select( + "url_hash, jd_structured_status, " + "jd_structured_extractor_version, " + "jd_structured_schema_version, " + "jd_structured_prompt_version, " + "jd_structured_raw_hash, jd_structured_cleaned_hash" + ) + .in_("url_hash", url_hashes) + .execute() + ) + + rows = resp.data + if not rows: + return {} + + return {row["url_hash"]: row for row in rows} + except Exception as exc: + raise DatabaseError( + f"get_existing_jobs failed for {len(url_hashes)} hashes: {exc}" + ) from exc diff --git a/scripts/jd_pipeline_llm.py b/scripts/jd_pipeline_llm.py new file mode 100644 index 0000000..3fcd6a9 --- /dev/null +++ b/scripts/jd_pipeline_llm.py @@ -0,0 +1,439 @@ +"""Async LLM client for batch JD structured extraction. + +Provides LLMClient with concurrency-limited batch processing, +grammar-constrained JSON generation via llama.cpp native json_schema format, +and a three-attempt retry strategy (standard -> repair -> minimal). + +Designed for llama.cpp server running qwen3-jd-parser.gguf. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import time +from typing import Any + +import httpx +from jsonschema import ValidationError, validate + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Error classes +# --------------------------------------------------------------------------- + + +class LLMError(Exception): + """Base error for LLM operations.""" + + +class LLMTimeoutError(LLMError): + """Request timed out.""" + + +class LLMJsonParseError(LLMError): + """Failed to parse model output as JSON.""" + + +class LLMValidationError(LLMError): + """Model output JSON does not conform to schema.""" + + +# --------------------------------------------------------------------------- +# Minimal core-field schema (Attempt 3 fallback) +# --------------------------------------------------------------------------- + +MINIMAL_SCHEMA: dict = { + "type": "object", + "properties": { + "job_title": {"type": "string"}, + "company_name": {"type": "string"}, + "skills": { + "type": "array", + "items": {"type": "string"}, + }, + "summary": {"type": "string"}, + }, + "required": ["job_title", "company_name", "skills", "summary"], + "additionalProperties": False, +} + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _build_response_format(schema: dict) -> dict: + """Build llama.cpp native json_schema response_format. + + llama.cpp expects:: + + {"type": "json_schema", "json_schema": {"schema": { ... }}} + + This is the *native* format -- NOT the OpenAI format which adds + ``name`` and ``strict`` wrappers around the schema object. + """ + return { + "type": "json_schema", + "json_schema": { + "schema": schema, + }, + } + + +def _compute_p95(values: list[float]) -> float: + """Return the 95th percentile of *values*.""" + if not values: + return 120.0 + sorted_vals = sorted(values) + idx = max(0, int(len(sorted_vals) * 0.95) - 1) + return sorted_vals[idx] + + +# --------------------------------------------------------------------------- +# LLMClient +# --------------------------------------------------------------------------- + + +class LLMClient: + """Async LLM client with concurrency-limited batch processing. + + Parameters + ---------- + base_url: + Base URL of the llama.cpp server (e.g. ``http://127.0.0.1:8091``). + model: + Model name for the ``model`` field in completions requests. + semaphore: + Max concurrent in-flight requests. Defaults to 6 (fewer than + the 8 server slots for scheduling headroom). + default_max_tokens: + Default ``max_tokens`` for generation. 1536 for standard schemas, + 3072 when evidence quotes are requested, hard cap 4096. + timeout: + Base request timeout in seconds. Dynamically adjusted based on + observed latency via ``_dynamic_timeout()``. + """ + + def __init__( + self, + base_url: str, + model: str, + semaphore: int = 6, + default_max_tokens: int = 1536, + timeout: float = 120.0, + ) -> None: + self.base_url = base_url.rstrip("/") + self.model = model + self._semaphore_size = semaphore + self.default_max_tokens = default_max_tokens + self._base_timeout = timeout + self._client: httpx.AsyncClient | None = None + self._semaphore: asyncio.Semaphore | None = None + self._latencies: list[float] = [] + + # -- context manager ----------------------------------------------------- + + async def __aenter__(self) -> LLMClient: + return self + + async def __aexit__(self, *args: Any) -> None: + await self.close() + + # -- internal helpers ---------------------------------------------------- + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None or self._client.is_closed: + # Client-level timeout is the 300 s hard cap; per-request + # timeouts use the dynamic value from _dynamic_timeout(). + self._client = httpx.AsyncClient( + base_url=self.base_url, + timeout=httpx.Timeout(300.0), + ) + return self._client + + def _get_semaphore(self) -> asyncio.Semaphore: + if self._semaphore is None: + self._semaphore = asyncio.Semaphore(self._semaphore_size) + return self._semaphore + + def _dynamic_timeout(self) -> float: + """``min(300, max(60, p95_latency * 2))`` based on observed latencies.""" + p95 = _compute_p95(self._latencies) if self._latencies else self._base_timeout + return min(300.0, max(60.0, p95 * 2)) + + def _record_latency(self, seconds: float) -> None: + self._latencies.append(seconds) + # Keep bounded to avoid unbounded memory growth + if len(self._latencies) > 500: + self._latencies = self._latencies[-500:] + + async def close(self) -> None: + if self._client and not self._client.is_closed: + await self._client.aclose() + + # -- tokenize ------------------------------------------------------------ + + async def tokenize(self, text: str) -> int: + """Call ``/tokenize`` endpoint to get exact token count.""" + client = await self._get_client() + payload = {"content": text} + timeout = self._dynamic_timeout() + try: + resp = await client.post("/tokenize", json=payload, timeout=timeout) + resp.raise_for_status() + data = resp.json() + return len(data.get("tokens", [])) + except httpx.TimeoutException as exc: + raise LLMTimeoutError(f"Tokenize request timed out: {exc}") from exc + except httpx.ConnectError as exc: + raise LLMError(f"Cannot connect to LLM server: {exc}") from exc + except httpx.HTTPStatusError as exc: + raise LLMError(f"Tokenize request failed: {exc}") from exc + + async def tokenize_stats(self, texts: list[str]) -> dict: + """Tokenize all *texts* and return ``{p50, p90, p95, max, count}`` stats. + + Uses the same concurrency semaphore as extraction requests so the + tokenize calls do not starve extraction bandwidth. + """ + sem = self._get_semaphore() + + async def _safe_tokenize(t: str) -> int: + async with sem: + return await self.tokenize(t) + + results = await asyncio.gather( + *[_safe_tokenize(t) for t in texts], + return_exceptions=True, + ) + + valid = [c for c in results if isinstance(c, int)] + if not valid: + return {"p50": 0, "p90": 0, "p95": 0, "max": 0, "count": 0} + sorted_counts = sorted(valid) + n = len(sorted_counts) + return { + "p50": sorted_counts[max(0, int(n * 0.50) - 1)], + "p90": sorted_counts[max(0, int(n * 0.90) - 1)], + "p95": sorted_counts[max(0, int(n * 0.95) - 1)], + "max": sorted_counts[-1], + "count": n, + } + + # -- extraction ---------------------------------------------------------- + + async def _call_model( + self, + messages: list[dict], + schema: dict, + max_tokens: int | None = None, + ) -> dict | None: + """Single call to ``/chat/completions`` with grammar constraint. + + Returns parsed JSON dict on success, or ``None`` if the model + returned no content. Raises :class:`LLMJsonParseError` when the + response body is not valid JSON, and :class:`LLMTimeoutError` / + :class:`LLMError` on network failures. + """ + client = await self._get_client() + mt = min(max_tokens or self.default_max_tokens, 4096) + + payload: dict[str, Any] = { + "model": self.model, + "messages": messages, + "max_tokens": mt, + "temperature": 0, + "response_format": _build_response_format(schema), + } + + timeout = self._dynamic_timeout() + t0 = time.monotonic() + try: + resp = await client.post( + "/chat/completions", json=payload, timeout=timeout + ) + resp.raise_for_status() + except httpx.TimeoutException as exc: + raise LLMTimeoutError(f"Completion request timed out: {exc}") from exc + except httpx.ConnectError as exc: + raise LLMError(f"Cannot connect to LLM server: {exc}") from exc + except httpx.HTTPStatusError as exc: + raise LLMError(f"Completion request failed: {exc}") from exc + finally: + elapsed = time.monotonic() - t0 + self._record_latency(elapsed) + + data = resp.json() + content = ( + data.get("choices", [{}])[0].get("message", {}).get("content", "") + ) + if not content: + return None + + try: + parsed = json.loads(content) + except json.JSONDecodeError as exc: + raise LLMJsonParseError( + f"Model output is not valid JSON: {exc}" + ) from exc + + return parsed + + async def extract( + self, + jd_text: str, + schema: dict, + max_tokens: int | None = None, + ) -> dict | None: + """Extract structured data from *jd_text* using *schema*. + + Three-attempt retry strategy (all at ``temperature=0``): + + 1. **Standard** -- call with full schema + grammar constraint. + 2. **Repair** -- feed validation errors back to model for repair. + 3. **Minimal** -- fall back to core fields only + (``job_title``, ``company_name``, ``skills``, ``summary``). + + Returns the extracted dict on success, or ``None`` if all + attempts fail (caller should handle dead-letter). + Network errors (timeout, connect) propagate as :class:`LLMError` + subclasses. + """ + # --- Attempt 1: standard ------------------------------------------- + result_1: dict | None = None + error_1 = "" + try: + messages = [ + { + "role": "system", + "content": ( + "You are a job description parser. Extract structured data as JSON.\n" + "Rules for 'skills':\n" + "- Only include technical skills, tools, frameworks, languages, and methodologies.\n" + "- Do NOT include: company perks, benefits, culture statements, work " + "arrangements, diversity statements, or soft traits like 'problem-solving'.\n" + "- Maximum 25 items; prefer the most specific and technical ones.\n" + "Rules for 'summary':\n" + "- 1-3 sentences capturing the role's purpose and key requirements.\n" + "Rules for 'experience_level':\n" + "- Choose from: intern, junior, mid, senior, lead, principal, unknown.\n" + "Rules for 'employment_type':\n" + "- Choose from: full_time, part_time, contract, temporary, internship, unknown.\n" + "If a field is unclear, use null rather than guessing." + ), + }, + {"role": "user", "content": jd_text}, + ] + result_1 = await self._call_model(messages, schema, max_tokens) + if result_1 is not None: + validate(instance=result_1, schema=schema) + return result_1 + error_1 = "Model returned empty content" + except (LLMJsonParseError, ValidationError) as exc: + error_1 = str(exc) + logger.debug("Attempt 1 failed: %s", exc) + except LLMError: + raise + + # --- Attempt 2: repair with validation error feedback --------------- + try: + repair_prompt = ( + f"The previous extraction had errors:\n{error_1}\n\n" + "Fix the errors and return valid JSON conforming to the schema. " + "For 'skills': only technical skills, max 25 items." + ) + messages: list[dict] = [ + { + "role": "system", + "content": ( + "You are a job description parser. Extract structured data as JSON.\n" + "Rules for 'skills':\n" + "- Only include technical skills, tools, frameworks, languages, and methodologies.\n" + "- Do NOT include: company perks, benefits, culture statements, work " + "arrangements, diversity statements, or soft traits like 'problem-solving'.\n" + "- Maximum 25 items; prefer the most specific and technical ones.\n" + "If a field is unclear, use null rather than guessing." + ), + }, + {"role": "user", "content": jd_text}, + ] + if result_1 is not None: + # Feed back the parsed-but-invalid output so the model can + # see what it produced and correct it. + messages.append( + {"role": "assistant", "content": json.dumps(result_1)} + ) + messages.append({"role": "user", "content": repair_prompt}) + + result_2 = await self._call_model(messages, schema, max_tokens) + if result_2 is not None: + validate(instance=result_2, schema=schema) + return result_2 + except (LLMJsonParseError, ValidationError) as exc: + logger.debug("Attempt 2 (repair) failed: %s", exc) + except LLMError: + raise + + # --- Attempt 3: minimal core fields --------------------------------- + try: + messages = [ + { + "role": "system", + "content": ( + "Extract ONLY the core fields from the job description as JSON: " + "job_title, company_name, skills, summary.\n" + "For 'skills': only technical skills, max 25 items." + ), + }, + {"role": "user", "content": jd_text}, + ] + result_3 = await self._call_model(messages, MINIMAL_SCHEMA, max_tokens) + if result_3 is not None: + validate(instance=result_3, schema=MINIMAL_SCHEMA) + return result_3 + except (LLMJsonParseError, ValidationError) as exc: + logger.warning("Attempt 3 (minimal) failed: %s", exc) + except LLMError: + raise + + return None + + async def extract_batch( + self, + items: list[tuple[str, dict]], + max_tokens: int | None = None, + ) -> list[dict | None]: + """Process a batch of ``(jd_text, schema)`` tuples concurrently. + + Uses semaphore for throttling. Returns a list of results in the + same order as *items*; ``None`` entries mark failures (including + network errors, which are caught per-item rather than propagated). + """ + sem = self._get_semaphore() + + async def _extract_one( + idx: int, jd_text: str, schema: dict + ) -> tuple[int, dict | None]: + async with sem: + try: + result = await self.extract( + jd_text, schema, max_tokens=max_tokens + ) + return (idx, result) + except LLMError as exc: + logger.warning("Extraction failed for item %d: %s", idx, exc) + return (idx, None) + + tasks = [ + _extract_one(i, jd_text, schema) for i, (jd_text, schema) in enumerate(items) + ] + results: list[dict | None] = [None] * len(items) + + for coro in asyncio.as_completed(tasks): + idx, result = await coro + results[idx] = result + + return results \ No newline at end of file diff --git a/scripts/jd_pipeline_preprocess.py b/scripts/jd_pipeline_preprocess.py new file mode 100644 index 0000000..c7d33e8 --- /dev/null +++ b/scripts/jd_pipeline_preprocess.py @@ -0,0 +1,92 @@ +"""Preprocessing module for raw JD text before LLM extraction. + +Provides text cleaning (boilerplate removal, unicode normalisation, control +character stripping, whitespace collapsing), hashing, and input row validation. +""" + +from __future__ import annotations + +import hashlib +import re +import unicodedata +from typing import List + +PREPROCESS_VERSION = "linkedin-jd-clean-v1" + +# --------------------------------------------------------------------------- +# LinkedIn boilerplate patterns to remove +# --------------------------------------------------------------------------- +LINKEDIN_BOILERPLATE: list[str] = [ + r"Application Process \(Takes \d+ Min\).*", + r"Easy Apply on LinkedIn.*", + r"Check email for next steps.*", + r"Participate in resume evaluation & interview stage.*", +] + + +def compute_hash(text: str) -> str: + """Compute SHA-256 hex digest of *text*.""" + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def preprocess(jd_raw: str) -> tuple[str, str]: + """Preprocess raw JD text for model consumption. + + Returns + ------- + tuple[str, str] + ``(jd_cleaned, cleaned_hash)`` + + Cleaning rules + -------------- + 1. Remove LinkedIn boilerplate snippets. + 2. Unicode normalisation (NFKC) -- fullwidth characters -> ASCII equivalents. + 3. Strip control characters (except ``\\n``, ``\\r``, ``\\t``). + 4. Collapse three-or-more consecutive blank lines down to two. + 5. Strip leading/trailing whitespace. + + Invariant + --------- + ``jd_raw`` is **never** mutated. ``jd_cleaned`` is the model-input text + only and should not be persisted back over the original. + """ + text = jd_raw + + # 1. Remove LinkedIn boilerplate + for pattern in LINKEDIN_BOILERPLATE: + text = re.sub(pattern, "", text, flags=re.IGNORECASE) + + # 2. Unicode normalisation: fullwidth -> ASCII equivalents + text = unicodedata.normalize("NFKC", text) + + # 3. Strip control characters (keep \n, \r, \t) + text = "".join(c for c in text if c.isprintable() or c in "\n\r\t") + + # 4. Collapse multiple blank lines to max 2 + text = re.sub(r"\n{3,}", "\n\n", text) + + # 5. Strip leading/trailing whitespace + text = text.strip() + + return text, compute_hash(text) + + +def validate_input_row(row: dict) -> List[str]: + """Validate a row from ``final.json`` has all required fields. + + Parameters + ---------- + row : dict + A single job entry from the input JSON file. + + Returns + ------- + list[str] + List of error messages. An empty list means the row is valid. + """ + errors: list[str] = [] + required = ["url", "title", "company", "jd"] + for field in required: + if field not in row or not row[field]: + errors.append(f"missing required field: {field}") + return errors diff --git a/scripts/job_priority_config.py b/scripts/job_priority_config.py new file mode 100644 index 0000000..32cdd81 --- /dev/null +++ b/scripts/job_priority_config.py @@ -0,0 +1,295 @@ +"""Configuration constants for job priority scoring. + +All configuration is defined here -- no scoring logic. +Imported by the scorer, sync pipeline, backfill scripts, and tests. +""" + +import re + +# --------------------------------------------------------------------------- +# Version tracking +# --------------------------------------------------------------------------- +SCORER_VERSION = "job-priority-v1" + +# --------------------------------------------------------------------------- +# Salary currency conversion (static GBP-based, no live exchange calls) +# --------------------------------------------------------------------------- +GBP_TO_GBP = 1.0 +USD_TO_GBP = 0.79 +EUR_TO_GBP = 0.86 + +# --------------------------------------------------------------------------- +# Salary midpoint (GBP) -> raw score mapping +# --------------------------------------------------------------------------- +# The scorer should convert any parsed salary to GBP, take the midpoint of +# ranges, and then pick the score from this table. Missing/unparseable +# salaries get the default below. +SALARY_SCORE_TABLE: list[tuple[int, int]] = [ + (120_000, 20), + (90_000, 18), + (70_000, 15), + (55_000, 12), + (40_000, 8), + (0, 5), +] + +# Default compensation score when salary is absent or unparseable +SALARY_MISSING_SCORE = 6 + +# --------------------------------------------------------------------------- +# Compensation component +# --------------------------------------------------------------------------- +COMPENSATION_WEIGHT = 20 # max score for this component + +# --------------------------------------------------------------------------- +# Role fit keywords (positive) +# --------------------------------------------------------------------------- +POSITIVE_ROLE_TERMS = frozenset({ + "software engineer", + "full stack", + "fullstack", + "backend", + "back end", + "frontend", + "front end", + "platform", + "developer", + "typescript", + "react", + "node", + "python", + "rust", + "ai", + "gen ai", + "genai", + "llm", + "agent", + "cloud", + "data engineer", + "devops", + "sre", + "site reliability", +}) + +# --------------------------------------------------------------------------- +# Role fit keywords (negative / lower priority) +# --------------------------------------------------------------------------- +NEGATIVE_ROLE_TERMS = frozenset({ + "recruiter", + "sales", + "marketing", + "data annotation", + "trainer", + "teacher", + "support", + "intern", + "apprentice", + "qa manual", + "wordpress only", +}) + +# --------------------------------------------------------------------------- +# Seniority signal groups +# --------------------------------------------------------------------------- +SENIOR_SIGNALS = frozenset({ + "senior", + "staff", + "lead", + "principal engineer", + "staff engineer", +}) + +MID_SIGNALS = frozenset({ + "mid", + "mid-level", + "ii", + "iii", +}) + +JUNIOR_SIGNALS = frozenset({ + "junior", + "graduate", + "new grad", + "associate", +}) + +PRINCIPAL_DIRECTOR_SIGNALS = frozenset({ + "principal", + "director", + "cto", + "vice president", + "vp", + "head of", +}) + +INTERN_SIGNALS = frozenset({ + "intern", + "internship", + "apprentice", + "trainee", + "unpaid", +}) + +# --------------------------------------------------------------------------- +# Work arrangement / location +# --------------------------------------------------------------------------- +UK_LOCATION_TERMS = frozenset({ + "uk", + "london", + "england", + "britain", + "united kingdom", + "remote", + "hybrid", + "europe", +}) + +WORKPLACE_TYPES = frozenset({ + "remote", + "hybrid", + "on-site", +}) + +# --------------------------------------------------------------------------- +# Recognised ATS hosts (application-path scoring) +# --------------------------------------------------------------------------- +RECOGNIZED_ATS_HOSTS = frozenset({ + "workday", + "myworkdayjobs", + "greenhouse", + "lever", + "ashby", + "smartrecruiters", + "icims", + "recruitee", + "applytojob", + "workable", + "breezy", + "taleo", + "successfactors", + "oraclecloud", +}) + +# --------------------------------------------------------------------------- +# Recruiter / agency company name regex (seed list) +# --------------------------------------------------------------------------- +RECRUITER_COMPANY_RE = re.compile( + r"(?i)\b(search|recruit|recruitment|staffing|talent|harnham|anson mccade|" + r"roc search|techohana|la fosse|opus|understanding recruitment|client server|" + r"xcede|trg|burns sheehan|mcgregor boyall|michael page|develop|hunter bond|" + r"oliver bernard|gravitas|mason frank|randstad|adecco|manpower|robert half|" + r"teksystems)\b" +) + +# --------------------------------------------------------------------------- +# Recruiter broadcast phrase regex (seed list) +# --------------------------------------------------------------------------- +RECRUITER_PHRASE_RE = re.compile( + r"(?i)\b(partnered with|on behalf of|our client|my client|representing|" + r"recruitment agency|talent partner|consultant|shortlisted|send your cv|" + r"submit your cv|resume to|interviews are currently underway)\b" +) + +# --------------------------------------------------------------------------- +# Aggregator / job-board patterns +# --------------------------------------------------------------------------- +AGGREGATOR_RE = re.compile( + r"(?i)\b(jobs via|efinancialcareers|hackajob|huzzle|fetchjobs|bestjobtool)\b" +) + +# --------------------------------------------------------------------------- +# Decorative / noisy text cleanup regexes +# --------------------------------------------------------------------------- +ZERO_WIDTH_RE = re.compile(r"[​-‍]") +CONTROL_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]") +DECORATIVE_SYMBOL_RE = re.compile(r"[\U0001F300-\U0001FAFF☀-➿]") +REPEATED_PUNCT_RE = re.compile(r"([!?.•●▪◦])\1{1,}") + +# --------------------------------------------------------------------------- +# Tier thresholds +# --------------------------------------------------------------------------- +TIER_THRESHOLDS: dict[str, int] = { + "high": 75, + "medium": 50, + "low": 25, + "reject": 0, +} + +# --------------------------------------------------------------------------- +# Penalty constants +# --------------------------------------------------------------------------- +SCAM_PENALTY = -20 +NON_ENGINEERING_PENALTY = -15 +UNPAID_COMMISSION_PENALTY = -10 +LOW_INFO_RECRUITER_PENALTY = -10 +AGGREGATOR_REPOST_PENALTY = -8 +SPONSORSHIP_PENALTY = -8 +NOISY_TEXT_PENALTY = -5 +DUPLICATE_LOW_QUALITY_PENALTY = -5 + +# --------------------------------------------------------------------------- +# Sponsorship penalty control flag (default disabled for v1) +# --------------------------------------------------------------------------- +SPONSORSHIP_PENALTY_ENABLED = False + +# --------------------------------------------------------------------------- +# Minimum JD length thresholds +# --------------------------------------------------------------------------- +MIN_JD_LENGTH_SHORT = 300 +MIN_JD_LENGTH_USABLE = 500 + +# --------------------------------------------------------------------------- +# Freshness scoring (days -> score) +# --------------------------------------------------------------------------- +# Days boundaries are upper-inclusive: e.g. <= 3 days gets 5. +FRESHNESS_DAYS: list[tuple[int, int]] = [ + (3, 5), + (7, 4), + (14, 3), + (30, 1), +] +# Older than the last boundary (or missing): 0 +FRESHNESS_DEFAULT_SCORE = 0 + +# --------------------------------------------------------------------------- +# Rank scoring (rank -> score) +# --------------------------------------------------------------------------- +# Rank boundaries are upper-inclusive: e.g. rank <= 50 gets 5. +RANK_SCORES: list[tuple[int, int]] = [ + (50, 5), + (150, 4), + (300, 3), + (600, 1), +] +# Missing or > 600: 0 +RANK_DEFAULT_SCORE = 0 + +# --------------------------------------------------------------------------- +# Application path scoring levels +# --------------------------------------------------------------------------- +# These are the component scores for the application-path sub-component (0..8). +APP_PATH_ATS_URL = 8 +APP_PATH_CLEAN_COMPANY_URL = 7 +APP_PATH_EASY_APPLY_USABLE = 5 +APP_PATH_EASY_APPLY_WEAK = 1 +APP_PATH_AGGREGATOR = 2 +APP_PATH_MISSING = 0 + +# --------------------------------------------------------------------------- +# Work arrangement / location scoring levels +# --------------------------------------------------------------------------- +ARRANGEMENT_REMOTE_UK = 10 +ARRANGEMENT_HYBRID_UK = 8 +ARRANGEMENT_ONSITE_UK = 5 +ARRANGEMENT_ONSITE_OUTSIDE_TARGET = 3 +ARRANGEMENT_NOT_UK = 0 + +# --------------------------------------------------------------------------- +# Source quality sub-penalties (start from 10 and subtract) +# --------------------------------------------------------------------------- +SQ_RECRUITER_COMPANY = -4 +SQ_RECRUITER_PHRASE = -3 +SQ_MISSING_SALARY = -2 +SQ_EASY_APPLY_NO_OWNED_URL = -2 +SQ_JD_TOO_SHORT = -2 +SQ_WEAK_APPLICANT_COUNT = -1 diff --git a/scripts/job_priority_scorer.py b/scripts/job_priority_scorer.py new file mode 100644 index 0000000..a6a79e8 --- /dev/null +++ b/scripts/job_priority_scorer.py @@ -0,0 +1,1204 @@ +#!/usr/bin/env python3 +"""Job priority scoring engine. + +Contains all 8 scoring components, penalty system, tier mapping, +and the main orchestration function score_job(). + +All functions are pure and deterministic (same input -> same output). +No LLM calls, no API calls, no external dependencies beyond Python stdlib. +""" + +from __future__ import annotations + +import re +import unicodedata +from dataclasses import dataclass +from datetime import date, datetime, timezone +from typing import Any +from urllib.parse import urlparse + +from scripts.job_priority_config import ( + AGGREGATOR_RE, + AGGREGATOR_REPOST_PENALTY, + APP_PATH_AGGREGATOR, + APP_PATH_ATS_URL, + APP_PATH_CLEAN_COMPANY_URL, + APP_PATH_EASY_APPLY_USABLE, + APP_PATH_EASY_APPLY_WEAK, + APP_PATH_MISSING, + ARRANGEMENT_HYBRID_UK, + ARRANGEMENT_NOT_UK, + ARRANGEMENT_ONSITE_OUTSIDE_TARGET, + ARRANGEMENT_ONSITE_UK, + ARRANGEMENT_REMOTE_UK, + CONTROL_RE, + DECORATIVE_SYMBOL_RE, + DUPLICATE_LOW_QUALITY_PENALTY, + EUR_TO_GBP, + FRESHNESS_DAYS, + FRESHNESS_DEFAULT_SCORE, + GBP_TO_GBP, + INTERN_SIGNALS, + JUNIOR_SIGNALS, + LOW_INFO_RECRUITER_PENALTY, + MID_SIGNALS, + MIN_JD_LENGTH_SHORT, + MIN_JD_LENGTH_USABLE, + NEGATIVE_ROLE_TERMS, + NOISY_TEXT_PENALTY, + NON_ENGINEERING_PENALTY, + POSITIVE_ROLE_TERMS, + PRINCIPAL_DIRECTOR_SIGNALS, + RANK_DEFAULT_SCORE, + RANK_SCORES, + RECOGNIZED_ATS_HOSTS, + RECRUITER_COMPANY_RE, + RECRUITER_PHRASE_RE, + REPEATED_PUNCT_RE, + SALARY_MISSING_SCORE, + SALARY_SCORE_TABLE, + SCAM_PENALTY, + SCORER_VERSION, + SENIOR_SIGNALS, + SPONSORSHIP_PENALTY, + SPONSORSHIP_PENALTY_ENABLED, + SQ_EASY_APPLY_NO_OWNED_URL, + SQ_JD_TOO_SHORT, + SQ_MISSING_SALARY, + SQ_RECRUITER_COMPANY, + SQ_RECRUITER_PHRASE, + SQ_WEAK_APPLICANT_COUNT, + TIER_THRESHOLDS, + UNPAID_COMMISSION_PENALTY, + USD_TO_GBP, + ZERO_WIDTH_RE, +) + + +# =========================================================================== +# ScoreResult +# =========================================================================== + + +@dataclass(frozen=True) +class ScoreResult: + score: float + tier: str + version: str + signals: dict + scoring_text: str + + +# =========================================================================== +# Internal helpers +# =========================================================================== + +# fmt: off +_CURRENCY_TO_GBP = { + "£": GBP_TO_GBP, # £ + "$": USD_TO_GBP, + "€": EUR_TO_GBP, # € +} +# fmt: on + +_YEARS_EXPERIENCE_RE = re.compile(r"\b\d{2}\s*\+\s*(?:years?|yrs?)\b", re.IGNORECASE) + +_HANDS_ON_RE = re.compile( + r"(?i)\b(coding|programming|implement(?:ing|s|ed)?|" + r"develop(?:ing|s|ed)?|architect(?:ing|s|ed|ure)?|" + r"design.*system|write.*code|build.*product|mentor|" + r"code.?review|hands.?on|shipping|deploying)\b" +) + +_SCAM_RE = re.compile( + r"(?i)\b(earn.*money.*(?:from home|online|fast)|" + r"make \$?\d+[k]?\s*(?:per|a|every)\s*(?:day|week|hour)|" + r"unlimited earning|start.*today.*(?:no experience|no interview)|" + r"no interview required|guaranteed.*(?:income|salary|pay)|" + r"mystery shopper|data entry.*(?:from home|remote).*\d+[kK]|" + r"bitcoin|crypto.*(?:trading|invest)|" + r"investment opportunity|" + r"envelope|" + r"no experience necessary.*(?:train|earn))\b" +) + +_NON_ENGINEERING_SCAM_RE = re.compile( + r"(?i)\b(?:unpaid|volunteer|commission.?only|commission.?based|" + r"assessment.?only|assessment.?based|" + r"1099.*only|equity.?only)\b" +) + +_SPONSORSHIP_RE = re.compile( + r"(?i)\b(no\s+sponsorship|no\s+visa\s+sponsorship|cannot\s+sponsor|" + r"unable\s+to\s+sponsor|no\s+longer\s+sponsor|does\s+not\s+sponsor|" + r"sponsorship\s+not\s+available|not\s+able\s+to\s+sponsor|" + r"no\s+.*\s+visa\s+.*\s+sponsor)\b" +) + +# Priority-ordered seniority levels (most specific first). +_SENIORITY_LEVELS: list[tuple[str, frozenset[str], float]] = [ + ("intern", INTERN_SIGNALS, 2.0), + ("principal", PRINCIPAL_DIRECTOR_SIGNALS, 7.0), + ("senior", SENIOR_SIGNALS, 11.0), + ("mid", MID_SIGNALS, 9.0), + ("junior", JUNIOR_SIGNALS, 5.5), +] + +_DATE_FORMATS = [ + "%Y-%m-%d", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S", + "%Y/%m/%d", + "%d/%m/%Y", + "%B %d, %Y", + "%d %B %Y", + "%Y-%m-%d %H:%M:%S", + "%Y/%m/%d %H:%M:%S", +] + + +def _term_in(term: str, text: str) -> bool: + """Check if *term* appears as a whole word in *text* (word boundaries).""" + return bool(re.search(rf"\b{re.escape(term)}\b", text, re.IGNORECASE)) + + +# =========================================================================== +# Pre-scoring text normalisation +# =========================================================================== + + +def extract_job_description(job_data: dict, raw_record: dict) -> str: + """Extract JD text with fallback priority. + + 1. job_description (from original normalisation) + 2. jobDescription (raw field) + 3. description (raw field) + 4. jd (raw field – backfill) + 5. raw_record.jd + 6. raw_record.description + Return empty string if none found. + """ + normalised = str(job_data.get("job_description", "") or "").strip() + if normalised: + return normalised + for key in ("jobDescription", "description", "jd"): + val = job_data.get(key) + if val and isinstance(val, str) and val.strip(): + return val.strip() + # Try raw_record fallback + for key in ("jd", "description"): + val = raw_record.get(key) + if val and isinstance(val, str) and val.strip(): + return val.strip() + return "" + + +def extract_posted_time(job_data: dict, raw_record: dict) -> str: + """Extract posted time with fallback. + + 1. post_time (already in NormalizedJob) + 2. postTime (raw) + 3. posted_date (raw) + 4. postedDate (raw) + 5. posted_time (raw) + """ + pt = str(job_data.get("post_time", "") or "").strip() + if pt: + return pt + for key in ("postTime", "posted_date", "postedDate", "posted_time"): + val = raw_record.get(key) + if val and isinstance(val, str) and val.strip(): + return val.strip() + return "" + + +def normalize_scoring_text(raw_text: str) -> tuple[str, dict]: + """Return (cleaned_text, noise_signals_dict). + + Steps: + 1. NFKC unicode normalize + 2. Remove zero-width chars + 3. Remove control chars + 4. Replace decorative symbols with space + 5. Collapse repeated punctuation to single + 6. Collapse repeated whitespace to single space + 7. Strip + """ + original_len = len(raw_text) + text = unicodedata.normalize("NFKC", raw_text) + + n_zw = len(ZERO_WIDTH_RE.findall(text)) + text = ZERO_WIDTH_RE.sub("", text) + + n_ctrl = len(CONTROL_RE.findall(text)) + text = CONTROL_RE.sub("", text) + + n_decor = len(DECORATIVE_SYMBOL_RE.findall(text)) + text = DECORATIVE_SYMBOL_RE.sub(" ", text) + + text = REPEATED_PUNCT_RE.sub(r"\1", text) + + text = re.sub(r"\s+", " ", text) + + text = text.strip() + + total_removed = original_len - len(text) + removal_ratio = total_removed / max(original_len, 1) + + noise = { + "original_length": original_len, + "clean_length": len(text), + "zero_width_removed": n_zw, + "control_chars_removed": n_ctrl, + "decorative_symbols_removed": n_decor, + "total_chars_removed": total_removed, + "removal_ratio": round(removal_ratio, 4), + "was_noisy": removal_ratio > 0.05, + } + return text, noise + + +# =========================================================================== +# Date parsing helper +# =========================================================================== + + +def _parse_date(date_str: str) -> date | None: + """Try to parse a date string using known formats. + + Returns None if parsing fails (e.g., relative dates like "2 days ago"). + """ + s = date_str.strip() + if not s: + return None + + # Try absolute date formats + for fmt in _DATE_FORMATS: + try: + dt = datetime.strptime(s, fmt) + return dt.date() + except ValueError: + continue + + # Try ISO-8601 with timezone offset (e.g. "2024-01-15T12:00:00+00:00") + try: + dt = datetime.fromisoformat(s) + return dt.date() + except (ValueError, TypeError): + pass + + # Try numeric (Unix timestamp in seconds or milliseconds) + try: + ts = float(s) + # If it looks like ms (> year 10000 threshold), divide + if ts > 100_000_000_000: + ts /= 1000 + return datetime.fromtimestamp(ts, tz=timezone.utc).date() + except (ValueError, OverflowError, OSError): + pass + + # Relative dates (can't parse without NLP; return None) + return None + + +# =========================================================================== +# URL helpers +# =========================================================================== + + +def _is_linkedin_url(url: str) -> bool: + if not url: + return False + try: + host = urlparse(url.strip()).hostname or "" + except Exception: + return False + return "linkedin.com" in host.lower() + + +def _is_ats_url(url: str) -> bool: + if not url: + return False + try: + host = urlparse(url.strip()).hostname or "" + except Exception: + return False + host = host.lower() + return any(ats in host for ats in RECOGNIZED_ATS_HOSTS) + + +# =========================================================================== +# Data-completeness helpers +# =========================================================================== + + +def _has_raw_jd(raw_record: dict) -> bool: + """Check if any raw JD field exists and is non-empty.""" + for key in ("jd", "description", "jobDescription"): + val = raw_record.get(key) + if val and isinstance(val, str) and val.strip(): + return True + return False + + +# =========================================================================== +# Compensation (0..20) +# =========================================================================== + + +def _parse_salary(salary_str: str) -> tuple[float | None, float | None, str]: + """Parse salary string into (min_gbp, max_gbp, detected_currency). + + Handles £ $ € prefixes, K/k multipliers, ranges and single values. + Returns (None, None, '') for unparseable input. + """ + s = salary_str.strip() + if not s: + return None, None, "" + + # Detect currency symbol + currency = "" + for sym in _CURRENCY_TO_GBP: + if sym in s: + currency = sym + break + + # Normalise text for parsing + cleaned = re.sub(r"(?i)\b(?:competitive|negotiable|depends|doe" + r"|commensurate|up\s+to|from|range|approx)\b", + "", s) + # Normalise K notation + cleaned = re.sub(r"(?i)(\d[\d,.]*)\s*[kK]", r"\g<1>000", cleaned) + # Strip commas inside numbers + cleaned = re.sub(r"(?<=\d),(?=\d)", "", cleaned) + + nums = [float(n) for n in re.findall(r"\d+(?:\.\d+)?", cleaned)] + if not nums: + return None, None, currency + + if len(nums) >= 2: + # Check the two biggest numbers to form a range + sorted_nums = sorted(nums, reverse=True) + hi = sorted_nums[0] + lo = sorted_nums[1] + if hi - lo < 0.01 * max(abs(hi), 1): + # Very close – treat as a single value + return hi, hi, currency + return lo, hi, currency + + return nums[0], nums[0], currency + + +def score_compensation(salary_str: str) -> tuple[float, dict]: + """Score compensation 0..20 from parsed salary. + + Returns (score, signals_dict). + """ + signals: dict[str, Any] = {"raw_salary": salary_str} + + min_gbp, max_gbp, currency = _parse_salary(salary_str) + + if min_gbp is None or max_gbp is None: + signals["score"] = SALARY_MISSING_SCORE + signals["parseable"] = False + signals["currency"] = currency + return float(SALARY_MISSING_SCORE), signals + + # Convert to GBP + rate = _CURRENCY_TO_GBP.get(currency, GBP_TO_GBP) + min_gbp *= rate + max_gbp *= rate + + midpoint_gbp = (min_gbp + max_gbp) / 2.0 + + # Look up score in SALARY_SCORE_TABLE (first row where midpoint >= threshold) + table_score = SALARY_MISSING_SCORE + for threshold, score in SALARY_SCORE_TABLE: + if midpoint_gbp >= threshold: + table_score = score + break + + signals["parseable"] = True + signals["currency"] = currency + signals["gbp_midpoint"] = round(midpoint_gbp, 2) + signals["parsed_min_gbp"] = round(min_gbp, 2) + signals["parsed_max_gbp"] = round(max_gbp, 2) + signals["score"] = table_score + return float(table_score), signals + + +# =========================================================================== +# Role Fit (0..20) +# =========================================================================== + + +def score_role_fit(title: str, scoring_text: str) -> tuple[float, dict]: + """Score role fit 0..20 from job title and JD text. + + Title matches are weighted +3/-3, JD text matches +1/-1. + """ + title_lower = title.lower() + text_lower = scoring_text.lower() + + pos_title: list[str] = [] + neg_title: list[str] = [] + pos_text: list[str] = [] + neg_text: list[str] = [] + + # Positive terms + for term in POSITIVE_ROLE_TERMS: + found_title = _term_in(term, title_lower) + found_text = _term_in(term, text_lower) + if found_title: + pos_title.append(term) + if found_text: + pos_text.append(term) + + # Negative terms + for term in NEGATIVE_ROLE_TERMS: + found_title = _term_in(term, title_lower) + found_text = _term_in(term, text_lower) + if found_title: + neg_title.append(term) + if found_text: + neg_text.append(term) + + score = ( + len(pos_title) * 3 + - len(neg_title) * 3 + + len(pos_text) * 1 + - len(neg_text) * 1 + ) + clamped = max(0.0, min(20.0, float(score))) + + signals = { + "positive_title_matches": pos_title, + "negative_title_matches": neg_title, + "positive_jd_matches": [t for t in pos_text if t not in pos_title], + "negative_jd_matches": [t for t in neg_text if t not in neg_title], + "matched_positive": len(pos_title) + len(pos_text), + "matched_negative": len(neg_title) + len(neg_text), + "score": clamped, + } + return clamped, signals + + +# =========================================================================== +# Seniority (0..12) +# =========================================================================== + + +def _find_seniority(text: str) -> tuple[str | None, float | None]: + """Find the highest-priority seniority level in *text*. + + Priority order: intern -> principal -> senior -> mid -> junior. + Returns (level_name, base_score) or (None, None). + """ + for level, signals, score in _SENIORITY_LEVELS: + if any(_term_in(term, text) for term in signals): + return level, score + return None, None + + +def score_seniority(title: str, scoring_text: str) -> tuple[float, dict]: + """Score seniority 0..12. + + Checks title first; if no signal, falls back to scoring_text. + Principal level gets +2 if hands-on signals are found in the JD. + -1 penalty if "10+ years" or similar mentioned. + """ + level, base = _find_seniority(title) + + source = "title" + matched_terms: list[str] = [] + if level is not None: + # Retrieve actual matched terms for the signal set + matched_level = level + for lvl, sig_set, _ in _SENIORITY_LEVELS: + if lvl == level: + matched_terms = [t for t in sig_set if _term_in(t, title)] + break + else: + # Fall back to scoring_text + level, base = _find_seniority(scoring_text) + if level is not None: + source = "scoring_text" + for lvl, sig_set, _ in _SENIORITY_LEVELS: + if lvl == level: + matched_terms = [t for t in sig_set if _term_in(t, scoring_text)] + break + + if level is None: + return 6.0, { + "matched_level": None, + "matched_terms": [], + "score": 6.0, + "notes": "default: no seniority signal", + } + + # +2 hands-on bonus for principal/director + hands_on = False + if level == "principal": + hands_on = bool(_HANDS_ON_RE.search(scoring_text)) + if hands_on: + base += 2.0 + + # -1 for 10+ years mentioned in JD + years_penalty = bool(_YEARS_EXPERIENCE_RE.search(scoring_text)) + if years_penalty: + base -= 1.0 + + final = min(base, 12.0) + + return final, { + "matched_level": level, + "matched_terms": matched_terms, + "source": source, + "hands_on_bonus": hands_on, + "years_penalty": years_penalty, + "score": final, + } + + +# =========================================================================== +# Work Arrangement (0..10) +# =========================================================================== + + +# UK geographical terms (excludes "remote" / "hybrid" which cause false +# positives when the location field is just "Remote" with no actual UK +# indicator). +_UK_GEO_TERMS = frozenset({"uk", "london", "england", "britain", "united kingdom", "europe"}) + + +def _is_uk_location(location: str, scoring_text: str) -> bool: + """Check if location or scoring_text indicates a UK-based role. + + Uses geographical terms only -- "remote"/"hybrid" in the location + field alone do _not_ count as a UK signal (they are ambiguous). The + caller's workplace_type branch handles those separately. + """ + combined = f"{location.lower()} {scoring_text.lower()}" + return any(_term_in(t, combined) for t in _UK_GEO_TERMS) + + +def score_work_arrangement( + workplace_type: str, location: str, scoring_text: str +) -> tuple[float, dict]: + """Score work arrangement 0..10.""" + wt = workplace_type.strip().lower() + is_uk = _is_uk_location(location, scoring_text) + + signal: dict[str, Any] = { + "workplace_type": wt or "unknown", + "location": location, + "is_uk": is_uk, + } + + if wt == "remote": + score = float(ARRANGEMENT_REMOTE_UK) if is_uk else float(ARRANGEMENT_HYBRID_UK - 3) + # Remote non-UK lands at 5 (hybrid - 3) + if not is_uk: + score = 5.0 + elif wt == "hybrid": + score = float(ARRANGEMENT_HYBRID_UK) if is_uk else 4.0 + elif wt == "on-site": + score = float(ARRANGEMENT_ONSITE_UK) if is_uk else float(ARRANGEMENT_ONSITE_OUTSIDE_TARGET) + elif wt == "unknown" or not wt: + # Empty/unknown: check JD and location for UK signals + if is_uk: + score = 6.0 + else: + score = float(ARRANGEMENT_NOT_UK) + else: + score = float(ARRANGEMENT_NOT_UK) + + signal["score"] = score + return float(score), signal + + +# =========================================================================== +# Application Path (0..8) +# =========================================================================== + + +def score_application_path( + apply_url: str, + external_url: str, + apply_type: str, + scoring_text: str, + has_salary: bool, + has_usable_jd: bool, +) -> tuple[float, dict]: + """Score application path quality 0..8.""" + au = apply_url.strip() + eu = external_url.strip() + at = apply_type.strip().lower() + is_easy_apply = at == "easy_apply" + + has_ats_url = _is_ats_url(au) or _is_ats_url(eu) + is_linkedin = _is_linkedin_url(au) or _is_linkedin_url(eu) + # A "clean" URL is non-empty, non-ATS, non-LinkedIn. + # Each URL is checked independently so a LinkedIn apply_url with a + # clean external_url (e.g. company career page) counts as clean. + has_clean_url = ( + (bool(au) and not _is_ats_url(au) and not _is_linkedin_url(au)) + or (bool(eu) and not _is_ats_url(eu) and not _is_linkedin_url(eu)) + ) + + is_aggregator = bool(AGGREGATOR_RE.search(scoring_text)) + + if has_ats_url: + score = APP_PATH_ATS_URL + reason = "ats_url" + elif has_clean_url: + score = APP_PATH_CLEAN_COMPANY_URL + reason = "clean_company_url" + elif is_easy_apply and (has_usable_jd or has_salary): + score = APP_PATH_EASY_APPLY_USABLE + reason = "easy_apply_usable" + elif is_easy_apply: + score = APP_PATH_EASY_APPLY_WEAK + reason = "easy_apply_weak" + elif is_aggregator: + score = APP_PATH_AGGREGATOR + reason = "aggregator" + else: + score = APP_PATH_MISSING + reason = "missing_application_info" + + signals = { + "score": float(score), + "reason": reason, + "has_ats_url": has_ats_url, + "has_clean_url": has_clean_url, + "is_linkedin": is_linkedin, + "is_aggregator": is_aggregator, + "is_easy_apply": is_easy_apply, + } + return float(score), signals + + +# =========================================================================== +# Freshness (0..10) = Freshness (0..5) + Rank (0..5) +# =========================================================================== + + +def parse_reference_date(input_path: str | None) -> date | None: + """Try to parse YYYYMMDD from --input path like output/YYYYMMDD.json.""" + if not input_path: + return None + m = re.search(r"(\d{4})(\d{2})(\d{2})", input_path) + if m: + try: + return date(int(m.group(1)), int(m.group(2)), int(m.group(3))) + except ValueError: + return None + return None + + +def score_freshness( + posted_time: str, + raw_record: dict, + reference_date: date | None = None, +) -> tuple[float, dict]: + """Score freshness 0..10 (0..5 freshness + 0..5 rank).""" + signals: dict[str, Any] = {} + + # --- Freshness sub-score (0..5) --- + parsed = _parse_date(posted_time) + use_default_date = False + if parsed is None: + use_default_date = True + freshness = float(FRESHNESS_DEFAULT_SCORE) + days_ago = None + else: + ref = reference_date if reference_date is not None else date.today() + days_ago = (ref - parsed).days if ref >= parsed else 0 + freshness = float(FRESHNESS_DEFAULT_SCORE) + for max_days, score in FRESHNESS_DAYS: + if days_ago <= max_days: + freshness = float(score) + break + + signals["days_ago"] = days_ago + signals["freshness_score"] = freshness + signals["use_default_date"] = use_default_date + + # --- Rank sub-score (0..5) --- + rank_val: int | None = None + rank_raw = raw_record.get("rank") + if rank_raw is not None: + try: + rank_val = int(rank_raw) + except (ValueError, TypeError): + rank_val = None + + rank_score = float(RANK_DEFAULT_SCORE) + if rank_val is not None: + for max_rank, score in RANK_SCORES: + if rank_val <= max_rank: + rank_score = float(score) + break + + signals["rank"] = rank_val + signals["rank_score"] = rank_score + + total = freshness + rank_score + signals["score"] = total + return total, signals + + +# =========================================================================== +# Data Completeness (0..10) +# =========================================================================== + + +def score_data_completeness( + job_title: str, + company_name: str, + location: str, + job_description: str, + salary: str, + posted_time: str, + apply_url: str, + external_url: str, + raw_record: dict | None = None, + apply_type: str = "", +) -> tuple[float, dict]: + """Score data completeness (1 point each for 10 signals, max 10).""" + if raw_record is None: + raw_record = {} + + # Resolve JD text for length check (fallback to raw_record JD fields) + jd_len_text = job_description.strip() + if not jd_len_text: + for key in ("jd", "description", "jobDescription"): + val = (raw_record or {}).get(key) + if val and isinstance(val, str) and val.strip(): + jd_len_text = val.strip() + break + + checks: list[tuple[str, bool]] = [ + ("has_title", bool(job_title.strip())), + ("has_company", bool(company_name.strip())), + ("has_location", bool(location.strip())), + ("has_jd_normalized", bool(job_description.strip())), + ("has_jd_raw", _has_raw_jd(raw_record)), + ("has_jd_length_500", len(jd_len_text) >= MIN_JD_LENGTH_USABLE), + ("has_salary", bool(salary.strip())), + ("has_posted_date", bool(posted_time.strip())), + ("has_application_url", bool(apply_url.strip() or external_url.strip())), + ("has_easy_apply", False), # determined below + ] + + # Determine easy-apply signal from apply_type (preferred) or raw_record + at = apply_type.strip().lower() if apply_type else "" + if not at: + at = str(raw_record.get("apply_type", "") or "").lower() + easy_apply_raw = str(raw_record.get("easy_apply", "") or "").lower() + is_easy_apply = at == "easy_apply" or easy_apply_raw in ("true", "1", "yes") + checks[9] = ("has_easy_apply", is_easy_apply) + + # Determine description source + if job_description.strip(): + desc_source = "normalized" + elif _has_raw_jd(raw_record): + desc_source = "raw" + else: + desc_source = "none" + + score = float(sum(1 for _, present in checks if present)) + + signals: dict[str, Any] = dict(checks) + signals["description_source"] = desc_source + signals["score"] = score + return score, signals + + +# =========================================================================== +# Source Quality / Recruiter Risk (0..10) +# =========================================================================== + + +def score_source_quality( + company_name: str, + scoring_text: str, + salary: str, + apply_type: str, + apply_url: str, + external_url: str, + has_usable_jd: bool, + applicant_count: str, + raw_record: dict, + rank: int | None, +) -> tuple[float, dict]: + """Score source quality 0..10 (start at 10, subtract penalties).""" + score = 10.0 + subtractions: dict[str, float] = {} + + # -4: recruiter company + recruiter_company = bool(RECRUITER_COMPANY_RE.search(company_name)) + if recruiter_company: + subtractions["recruiter_company"] = SQ_RECRUITER_COMPANY + score += SQ_RECRUITER_COMPANY + + # -3: recruiter phrase in JD + recruiter_phrase = bool(RECRUITER_PHRASE_RE.search(scoring_text)) + if recruiter_phrase: + subtractions["recruiter_phrase"] = SQ_RECRUITER_PHRASE + score += SQ_RECRUITER_PHRASE + + # -2: salary missing + missing_salary = not bool(salary.strip()) + if missing_salary: + subtractions["missing_salary"] = SQ_MISSING_SALARY + score += SQ_MISSING_SALARY + + # -2: easy apply and no owned ATS/company URL + at = apply_type.strip().lower() + is_easy_apply = at == "easy_apply" + au = apply_url.strip() + eu = external_url.strip() + has_owned_url = _is_ats_url(au) or _is_ats_url(eu) or (bool(eu) and not _is_linkedin_url(eu)) + easy_no_url = is_easy_apply and not has_owned_url + if easy_no_url: + subtractions["easy_apply_no_owned_url"] = SQ_EASY_APPLY_NO_OWNED_URL + score += SQ_EASY_APPLY_NO_OWNED_URL + + # -2: JD shorter than MIN_JD_LENGTH_SHORT + jd_too_short = len(scoring_text) < MIN_JD_LENGTH_SHORT + if jd_too_short: + subtractions["jd_too_short"] = SQ_JD_TOO_SHORT + score += SQ_JD_TOO_SHORT + + # -1: applicant_count is N/A and rank is weak + ac = applicant_count.strip().lower() + is_na = ac in ("n/a", "na", "not applicable", "") + weak_rank = rank is None or rank > 300 + weak_applicant = is_na and weak_rank + if weak_applicant: + subtractions["weak_applicant_count"] = SQ_WEAK_APPLICANT_COUNT + score += SQ_WEAK_APPLICANT_COUNT + + clamped = max(0.0, min(10.0, score)) + + signals = { + "start_score": 10.0, + "recruiter_company": recruiter_company, + "recruiter_phrase": recruiter_phrase, + "missing_salary": missing_salary, + "easy_apply_no_owned_url": easy_no_url, + "jd_too_short": jd_too_short, + "weak_applicant_count": weak_applicant, + "subtractions": subtractions, + "score": clamped, + } + return clamped, signals + + +# =========================================================================== +# Penalties +# =========================================================================== + + +def _has_any_positive_role_term(title: str, scoring_text: str) -> bool: + return any( + _term_in(term, title) or _term_in(term, scoring_text) + for term in POSITIVE_ROLE_TERMS + ) + + +def _has_any_negative_role_term(title: str, scoring_text: str) -> bool: + return any( + _term_in(term, title) or _term_in(term, scoring_text) + for term in NEGATIVE_ROLE_TERMS + ) + + +def _is_recruiter_company(company_name: str) -> bool: + return bool(RECRUITER_COMPANY_RE.search(company_name)) + + +def _is_aggregator_source(scoring_text: str) -> bool: + return bool(AGGREGATOR_RE.search(scoring_text)) + + +def apply_penalties( + score: float, signals: dict, job_data: dict, scoring_text: str = "" +) -> tuple[float, list[str]]: + """Subtract penalties from *score* after component scoring. + + *scoring_text* is the pre-normalised JD text (already resolved via + extract_job_description). When empty, falls back to + job_data["job_description"] for backward compatibility with direct calls. + + Returns (penalized_score, applied_penalties_list). + """ + penalties: list[str] = [] + penalised = score + + job_title = str(job_data.get("job_title", "") or "") + company_name = str(job_data.get("company_name", "") or "") + salary = str(job_data.get("salary", "") or "") + apply_url = str(job_data.get("apply_url", "") or "") + external_url = str(job_data.get("external_url", "") or "") + apply_type = str(job_data.get("apply_type", "") or "").lower() + if not scoring_text: + scoring_text = str(job_data.get("job_description", "") or "") + + # 1. SCAM_PENALTY (-20) + if _SCAM_RE.search(scoring_text): + penalised += SCAM_PENALTY + penalties.append(f"scam:{SCAM_PENALTY}") + + # 2. NON_ENGINEERING_PENALTY (-15): negative terms but no positive + has_pos = _has_any_positive_role_term(job_title, scoring_text) + has_neg = _has_any_negative_role_term(job_title, scoring_text) + if not has_pos and has_neg: + penalised += NON_ENGINEERING_PENALTY + penalties.append(f"non_engineering:{NON_ENGINEERING_PENALTY}") + + # 3. UNPAID_COMMISSION_PENALTY (-10) + if _NON_ENGINEERING_SCAM_RE.search(scoring_text): + penalised += UNPAID_COMMISSION_PENALTY + penalties.append(f"unpaid_commission:{UNPAID_COMMISSION_PENALTY}") + + # 4. LOW_INFO_RECRUITER_PENALTY (-10) + is_recruiter = _is_recruiter_company(company_name) + missing_salary = not bool(salary.strip()) + is_easy_apply = apply_type == "easy_apply" + jd_text = scoring_text + usable_jd = len(jd_text.strip()) >= MIN_JD_LENGTH_USABLE + if is_recruiter and missing_salary and is_easy_apply and not usable_jd: + penalised += LOW_INFO_RECRUITER_PENALTY + penalties.append(f"low_info_recruiter:{LOW_INFO_RECRUITER_PENALTY}") + + # 5. AGGREGATOR_REPOST_PENALTY (-8) + is_aggregator = _is_aggregator_source(scoring_text) + au = apply_url.strip() + eu = external_url.strip() + has_owned_url = bool(au) or bool(eu) + if is_aggregator and missing_salary and not has_owned_url: + penalised += AGGREGATOR_REPOST_PENALTY + penalties.append(f"aggregator_repost:{AGGREGATOR_REPOST_PENALTY}") + + # 6. SPONSORSHIP_PENALTY (-8) + if SPONSORSHIP_PENALTY_ENABLED and _SPONSORSHIP_RE.search(scoring_text): + penalised += SPONSORSHIP_PENALTY + penalties.append(f"sponsorship:{SPONSORSHIP_PENALTY}") + + # 7. NOISY_TEXT_PENALTY (-5) + noise = signals.get("noise", {}) + removal_ratio = noise.get("removal_ratio", 0) + clean_len = noise.get("clean_length", 0) + if removal_ratio > 0.05 and clean_len < MIN_JD_LENGTH_USABLE: + penalised += NOISY_TEXT_PENALTY + penalties.append(f"noisy_text:{NOISY_TEXT_PENALTY}") + + # 8. DUPLICATE_LOW_QUALITY_PENALTY (-5) + missing_title = not bool(job_title.strip()) + missing_company = not bool(company_name.strip()) + extremely_short_jd = len(jd_text.strip()) < 100 + if missing_title or missing_company or extremely_short_jd: + penalised += DUPLICATE_LOW_QUALITY_PENALTY + penalties.append(f"low_quality_duplicate:{DUPLICATE_LOW_QUALITY_PENALTY}") + + return penalised, penalties + + +# =========================================================================== +# Low-value signal counting (hard-reject guard) +# =========================================================================== + + +def _count_low_value_signals(job_data: dict, signals: dict) -> int: + """Count how many independent low-value signals are present. + + Hard-reject requires at least 2 independent low-value signals. + """ + count = 0 + sq = signals.get("source_quality", {}) + ap = signals.get("application_friction", {}) + dc = signals.get("data_quality", {}) + rf = signals.get("role_fit", {}) + + # Recruiter-like company + if sq.get("recruiter_company", False): + count += 1 + # Aggregator-like source + if ap.get("is_aggregator", False): + count += 1 + # Missing salary + if not str(job_data.get("salary", "") or "").strip(): + count += 1 + # Missing usable JD + if not dc.get("has_jd_length_500", False): + count += 1 + # Non-engineering role (no positive terms, has negative) + if rf.get("matched_positive", 0) == 0 and rf.get("matched_negative", 0) > 0: + count += 1 + # Easy apply only (no ATS or clean URL) + at = str(job_data.get("apply_type", "") or "").lower() + if at == "easy_apply" and not ap.get("has_ats_url", False) and not ap.get("has_clean_url", False): + count += 1 + + return count + + +# =========================================================================== +# Tier mapping +# =========================================================================== + + +def map_tier(score: float) -> str: + """Map score to tier using TIER_THRESHOLDS. + + Thresholds are upper-inclusive (score >= threshold). + """ + tiers = sorted(TIER_THRESHOLDS.items(), key=lambda x: -x[1]) + for tier, threshold in tiers: + if score >= threshold: + return tier + return "reject" + + +# =========================================================================== +# Main orchestration +# =========================================================================== + + +def score_job( + job_data: dict, reference_date: date | None = None +) -> ScoreResult: + """Score a single job and return a ScoreResult. + + *job_data* is a dict with keys matching NormalizedJob fields: + job_title, company_name, location, salary, post_time, + apply_url, external_url, job_description (may be empty), + apply_type, source_channel, + workplace_type (optional), + raw_record (dict with raw fields). + + Returns ScoreResult with score (0..100), tier, signals, and scoring_text. + """ + # -- Extract structured fields from job_data -- + job_title = str(job_data.get("job_title", "") or "") + company_name = str(job_data.get("company_name", "") or "") + location = str(job_data.get("location", "") or "") + salary = str(job_data.get("salary", "") or "") + post_time = str(job_data.get("post_time", "") or "") + apply_url = str(job_data.get("apply_url", "") or "") + external_url = str(job_data.get("external_url", "") or "") + job_description = str(job_data.get("job_description", "") or "") + apply_type = str(job_data.get("apply_type", "") or "") + source_channel = str(job_data.get("source_channel", "") or "") + workplace_type = str(job_data.get("workplace_type", "") or "") + + raw_record = job_data.get("raw_record", {}) + if not isinstance(raw_record, dict): + raw_record = {} + + # -- Fallback extraction -- + jd_text = extract_job_description({"job_description": job_description}, raw_record) + posted_time = extract_posted_time({"post_time": post_time}, raw_record) + + # -- Normalise -- + scoring_text, noise_signals = normalize_scoring_text(jd_text) + + # -- Component scoring -- + comp_score, comp_signals = score_compensation(salary) + + role_score, role_signals = score_role_fit(job_title, scoring_text) + + sen_score, sen_signals = score_seniority(job_title, scoring_text) + + # Work arrangement: try job_data first, then raw_record + if not workplace_type: + workplace_type = str(raw_record.get("workplace_type", "") or "") + arr_score, arr_signals = score_work_arrangement(workplace_type, location, scoring_text) + + has_salary = bool(salary.strip()) + has_usable_jd = bool(scoring_text) and len(scoring_text) >= MIN_JD_LENGTH_USABLE + + app_score, app_signals = score_application_path( + apply_url, external_url, apply_type, scoring_text, + has_salary, has_usable_jd, + ) + + fresh_score, fresh_signals = score_freshness( + posted_time, raw_record, reference_date, + ) + + dc_score, dc_signals = score_data_completeness( + job_title, company_name, location, + job_description, salary, posted_time, + apply_url, external_url, + raw_record, + apply_type=apply_type, + ) + + rank_val: int | None = None + rank_raw = raw_record.get("rank") + if rank_raw is not None: + try: + rank_val = int(rank_raw) + except (ValueError, TypeError): + pass + + applicant_count = str(raw_record.get("applicant_count", "") or "") + + sq_score, sq_signals = score_source_quality( + company_name, scoring_text, salary, apply_type, + apply_url, external_url, has_usable_jd, + applicant_count, raw_record, rank_val, + ) + + # -- Sum component scores -- + raw_total = ( + comp_score + + role_score + + sen_score + + arr_score + + app_score + + fresh_score + + dc_score + + sq_score + ) + + # -- Assemble signals -- + combined_signals: dict[str, Any] = { + "compensation": comp_signals, + "role_fit": role_signals, + "seniority": sen_signals, + "work_arrangement": arr_signals, + "application_friction": app_signals, + "freshness": fresh_signals, + "data_quality": dc_signals, + "source_quality": sq_signals, + "noise": noise_signals, + "penalties": [], + } + + # -- Penalties -- + penalised_score, penalties = apply_penalties(raw_total, combined_signals, job_data, scoring_text) + combined_signals["penalties"] = penalties + + # -- Clamp to 0..100 -- + final_score = max(0.0, min(100.0, penalised_score)) + final_score = round(final_score, 1) + + # -- Map to tier (with hard-reject guard) -- + tier = map_tier(final_score) + if tier == "reject": + low_count = _count_low_value_signals(job_data, combined_signals) + if low_count < 2: + tier = "low" + + return ScoreResult( + score=final_score, + tier=tier, + version=SCORER_VERSION, + signals=combined_signals, + scoring_text=scoring_text, + ) diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..6dc615a --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,11 @@ +# Install with uv (preferred): +# uv pip install -r scripts/requirements.txt +# +# If you want an isolated venv first: +# uv venv +# source .venv/bin/activate +# uv pip install -r scripts/requirements.txt + +httpx>=0.27.0 +supabase>=2.0.0 +jsonschema>=4.21.0 diff --git a/scripts/sponsor_filter.py b/scripts/sponsor_filter.py new file mode 100644 index 0000000..c274209 --- /dev/null +++ b/scripts/sponsor_filter.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 +"""Match jobs.jobs companies against the UK Licensed Sponsors register. + +Downloads the Gov.uk CSV (or uses a local cache), normalises company names, +and updates jobs.jobs with has_sponsor_licence / sponsor_match_score / +sponsor_match_name. + +Usage: + python scripts/sponsor_filter.py # download + match + python scripts/sponsor_filter.py --csv data/sponsor_register.csv # use local file + python scripts/sponsor_filter.py --dry-run # preview without DB writes +""" +from __future__ import annotations + +import argparse +import csv +import json +import os +import pathlib +import re +import sys +import time +from dataclasses import dataclass + +# Ensure project root is on sys.path +_project_root = str(pathlib.Path(__file__).resolve().parent.parent) +if _project_root not in sys.path: + sys.path.insert(0, _project_root) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +GOV_UK_PUBLICATION_PAGE = ( + "https://www.gov.uk/government/publications/register-of-licensed-sponsors-workers" +) +_csv_dir = os.environ.get("SPONSOR_CSV_DIR") +DEFAULT_CSV_PATH = ( + pathlib.Path(_csv_dir) / "sponsor_register.csv" + if _csv_dir + else pathlib.Path(_project_root) / "data" / "sponsor_register.csv" +) +_META_PATH = DEFAULT_CSV_PATH.with_suffix(".meta.json") +BATCH_SIZE = 50 + + +# --------------------------------------------------------------------------- +# Name normalisation +# --------------------------------------------------------------------------- + +_STRIP_SUFFIXES = re.compile( + r"\s*\b(" + r"ltd|limited|plc|inc|llp|lp|llc|group|holdings|uk|" + r"co\.?\s*ltd|corporation|corp|gmbh|ag|sa|srl|bv|nv|" + r"t/a\s+\S+|trading\s+as\s+\S+" + r")\b\.?\s*", + re.IGNORECASE, +) + +_NOISE_CHARS = re.compile(r"[\"'\(\)\[\]\-–—,\.&!@#]") +_MULTI_SPACE = re.compile(r"\s+") + + +def normalise_name(raw: str) -> str: + """Lowercase, strip legal suffixes and punctuation for matching.""" + s = raw.strip().lower() + s = _STRIP_SUFFIXES.sub(" ", s) + s = _NOISE_CHARS.sub(" ", s) + s = _MULTI_SPACE.sub(" ", s).strip() + return s + + +# --------------------------------------------------------------------------- +# CSV loading +# --------------------------------------------------------------------------- + +def load_sponsor_set(csv_path: pathlib.Path) -> dict[str, str]: + """Load CSV -> {normalised_name: original_name} for Skilled Worker sponsors.""" + sponsors: dict[str, str] = {} + with open(csv_path, "r", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + for row in reader: + route = row.get("Route", "") + if "Skilled Worker" not in route: + continue + org = row.get("Organisation Name", "").strip() + if not org: + continue + norm = normalise_name(org) + if norm and norm not in sponsors: + sponsors[norm] = org + return sponsors + + +def _resolve_csv_url() -> str: + """Find the latest CSV link from the Gov.uk publication page. + + Uses an HTML parser to find tags whose href ends with .csv and + whose text or URL contains 'Worker'. Falls back to SPONSOR_CSV_URL + env var if set, or if the page layout changes beyond recognition. + """ + import httpx + from html.parser import HTMLParser + + override = os.environ.get("SPONSOR_CSV_URL") + if override: + return override + + class _LinkExtractor(HTMLParser): + def __init__(self): + super().__init__() + self.csv_links: list[str] = [] + + def handle_starttag(self, tag, attrs): + if tag != "a": + return + href = dict(attrs).get("href", "") + if href.endswith(".csv") and "worker" in href.lower(): + if href.startswith("/"): + href = "https://www.gov.uk" + href + self.csv_links.append(href) + + print("Resolving latest CSV URL from Gov.uk publication page ...") + resp = httpx.get(GOV_UK_PUBLICATION_PAGE, follow_redirects=True, timeout=30) + resp.raise_for_status() + + parser = _LinkExtractor() + parser.feed(resp.text) + + # Prefer absolute assets.publishing URLs over relative /csv-preview links + absolute = [u for u in parser.csv_links if "assets.publishing.service.gov.uk" in u] + url = absolute[0] if absolute else (parser.csv_links[0] if parser.csv_links else None) + + if not url: + raise RuntimeError( + f"Could not find CSV link on {GOV_UK_PUBLICATION_PAGE}. " + "Page layout may have changed - set SPONSOR_CSV_URL manually." + ) + print(f" Found: {url}") + return url + + +def _read_meta() -> dict: + """Read the sidecar metadata file (last known URL + download time).""" + if _META_PATH.exists(): + try: + return json.loads(_META_PATH.read_text()) + except (json.JSONDecodeError, OSError): + pass + return {} + + +def _write_meta(url: str) -> None: + _META_PATH.parent.mkdir(parents=True, exist_ok=True) + _META_PATH.write_text(json.dumps({ + "url": url, + "downloaded_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + })) + + +def download_csv(dest: pathlib.Path, csv_url: str) -> None: + """Download the register CSV from Gov.uk.""" + import httpx + + print("Downloading sponsor register ...") + dest.parent.mkdir(parents=True, exist_ok=True) + with httpx.stream("GET", csv_url, follow_redirects=True, timeout=60) as r: + r.raise_for_status() + with open(dest, "wb") as f: + for chunk in r.iter_bytes(chunk_size=65536): + f.write(chunk) + print(f" Saved to {dest} ({dest.stat().st_size / 1024 / 1024:.1f} MB)") + + +def ensure_csv(csv_path: pathlib.Path | None) -> tuple[pathlib.Path, bool, str | None]: + """Return (path, csv_changed, resolved_url). + + csv_changed is True when the Gov.uk register was re-downloaded (new + publication) or a user-supplied CSV is given (unknown provenance). + resolved_url is the URL that was resolved/downloaded (None for user CSV). + Caller writes meta only after a successful full scan. + """ + if csv_path: + print(f"Using user-supplied CSV: {csv_path}") + return csv_path, True, None + + path = DEFAULT_CSV_PATH + current_url = _resolve_csv_url() + meta = _read_meta() + + if path.exists() and meta.get("url") == current_url: + print(f"CSV is up-to-date (downloaded {meta.get('downloaded_at', '?')}): {path}") + return path, False, current_url + + if path.exists(): + print(f"New publication detected, re-downloading ...") + download_csv(path, current_url) + return path, True, current_url + + +# --------------------------------------------------------------------------- +# Matching +# --------------------------------------------------------------------------- + +@dataclass +class MatchResult: + has_licence: bool + score: float + matched_name: str | None + + + +def match_company( + company_name: str, + sponsor_norm_set: set[str], + sponsor_lookup: dict[str, str], +) -> MatchResult: + """Match a single company name against the sponsor register.""" + if not company_name or not company_name.strip(): + return MatchResult(False, 0.0, None) + + norm = normalise_name(company_name) + if not norm: + return MatchResult(False, 0.0, None) + + # Pass 1: exact match on normalised name + if norm in sponsor_norm_set: + return MatchResult(True, 100.0, sponsor_lookup[norm]) + + # Pass 2: prefix match + norm_tokens = norm.split() + if len(norm_tokens) == 1 and len(norm) >= 5: + # Single-word name: high false-positive risk (brand collisions). + # Return as candidate (has_licence=False, score=80) so downstream + # can surface it for review without auto-approving. + candidates = [] + for sponsor_norm, sponsor_orig in sponsor_lookup.items(): + sp_tokens = sponsor_norm.split() + if sp_tokens and sp_tokens[0] == norm and len(sp_tokens) <= 2: + candidates.append(sponsor_orig) + if len(candidates) == 1: + return MatchResult(False, 80.0, candidates[0]) + + elif len(norm_tokens) >= 2: + # Multi-word name: match sponsor entries that start with ALL our tokens + for sponsor_norm, sponsor_orig in sponsor_lookup.items(): + sp_tokens = sponsor_norm.split() + if len(sp_tokens) >= len(norm_tokens): + if sp_tokens[:len(norm_tokens)] == norm_tokens: + return MatchResult(True, 96.0, sponsor_orig) + + return MatchResult(False, 0.0, None) + + +# --------------------------------------------------------------------------- +# Supabase +# --------------------------------------------------------------------------- + +def _load_dotenv(path: str | os.PathLike[str]) -> None: + """Minimal .env loader — same as sync_autocli_jobs.""" + p = pathlib.Path(path) + if not p.is_file(): + return + for raw_line in p.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip().strip("'").strip('"') + if not key or key in os.environ: + continue + os.environ[key] = value + + +def _auto_load_env() -> None: + """Load .env from CWD then project root (no override of existing vars).""" + _load_dotenv(pathlib.Path.cwd() / ".env") + _load_dotenv(pathlib.Path(__file__).resolve().parent.parent / ".env") + + +def _create_supabase_client(): + _auto_load_env() + # Strip _project_root and CWD from sys.path so the local `supabase/` + # migrations dir doesn't shadow the PyPI package, then restore after import. + saved = sys.path[:] + sys.path = [p for p in sys.path if p not in ("", ".", _project_root, os.getcwd())] + try: + from supabase import create_client + finally: + sys.path = saved + + url = os.environ.get("SUPABASE_URL", "") + key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY", "") + if not url or not key: + raise SystemExit( + "Missing Supabase credentials. Set SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY." + ) + return create_client(url, key) + + +def fetch_jobs(client, *, full_scan: bool = False) -> list[dict]: + """Fetch (company_name, id) pairs from jobs.jobs with pagination. + + full_scan=False (default): only rows where sponsor_match_score IS NULL + (never been checked). Used on daily runs with unchanged CSV. + full_scan=True: all rows. Used when CSV changed (new Gov.uk publication) + or --full flag, so revoked/new licences are reflected. + """ + all_rows: list[dict] = [] + page_size = 1000 + offset = 0 + while True: + q = ( + client.schema("jobs") + .from_("jobs") + .select("id, company_name") + .not_.is_("company_name", "null") + ) + if not full_scan: + q = q.is_("sponsor_match_score", "null") + resp = q.range(offset, offset + page_size - 1).execute() + all_rows.extend(resp.data) + if len(resp.data) < page_size: + break + offset += page_size + return all_rows + + +def _batch_update(client, batch: list[tuple[str, MatchResult]]) -> None: + """Update a batch of job rows. Supabase doesn't support multi-row UPDATE + in one call, so we loop but keep batches small to show progress.""" + for job_id, match in batch: + client.schema("jobs").from_("jobs").update({ + "has_sponsor_licence": match.has_licence, + "sponsor_match_score": match.score, + "sponsor_match_name": match.matched_name, + }).eq("id", job_id).execute() + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser(description="Match jobs against UK Sponsor Register") + parser.add_argument("--csv", type=pathlib.Path, help="Path to local CSV (skip download)") + parser.add_argument("--dry-run", action="store_true", help="Preview matches without DB writes") + parser.add_argument("--full", action="store_true", help="Re-scan all jobs (not just new ones)") + args = parser.parse_args() + + # 1. Load sponsor data + csv_path, csv_changed, resolved_url = ensure_csv(args.csv) + sponsor_lookup = load_sponsor_set(csv_path) + sponsor_norm_set = set(sponsor_lookup.keys()) + print(f"Loaded {len(sponsor_lookup):,} Skilled Worker sponsors") + + # 2. Fetch jobs — full scan when CSV changed or --full flag + full_scan = args.full or csv_changed + client = _create_supabase_client() + jobs = fetch_jobs(client, full_scan=full_scan) + + mode = "FULL (CSV changed)" if csv_changed else ("FULL (--full)" if args.full else "INCREMENTAL (new jobs only)") + print(f"Mode: {mode}") + print(f"Matching {len(jobs):,} jobs ...") + + # 3. Match + matched = 0 + exact = 0 + prefix = 0 + candidate = 0 + results: list[tuple[dict, MatchResult]] = [] + + for job in jobs: + m = match_company( + job["company_name"], + sponsor_norm_set, + sponsor_lookup, + ) + results.append((job, m)) + if m.has_licence: + matched += 1 + if m.score == 100.0: + exact += 1 + else: + prefix += 1 + elif m.score > 0: + candidate += 1 + + # 4. Report + print(f"\n{'='*60}") + if not jobs: + print("No jobs to match.") + return + pct = matched / len(jobs) * 100 + print(f"Results: {matched}/{len(jobs)} jobs confirmed ({pct:.1f}%)") + print(f" Exact matches: {exact}") + print(f" Prefix matches: {prefix}") + print(f" Candidates: {candidate} (single-word, needs review)") + print(f" No match: {len(jobs) - matched - candidate}") + + # Show some sample matches + print(f"\nSample CONFIRMED (first 15):") + shown = 0 + for job, m in results: + if m.has_licence and shown < 15: + tag = "EXACT" if m.score == 100.0 else f"PREFIX({m.score:.0f})" + print(f" [{tag}] '{job['company_name']}' -> '{m.matched_name}'") + shown += 1 + + if candidate: + print(f"\nSample CANDIDATE (first 10, has_licence=false, score=80):") + shown = 0 + for job, m in results: + if not m.has_licence and m.score > 0 and shown < 10: + print(f" '{job['company_name']}' -> '{m.matched_name}'") + shown += 1 + + print(f"\nSample UNMATCHED (first 10):") + shown = 0 + for job, m in results: + if not m.has_licence and m.score == 0 and shown < 10: + print(f" '{job['company_name']}'") + shown += 1 + + # 5. Update DB + if args.dry_run: + print(f"\n[DRY RUN] No DB writes. Re-run without --dry-run to apply.") + return + + print(f"\nUpdating Supabase ({BATCH_SIZE} rows/batch) ...") + total = len(results) + for i in range(0, total, BATCH_SIZE): + batch = [(job["id"], m) for job, m in results[i:i + BATCH_SIZE]] + _batch_update(client, batch) + done = min(i + BATCH_SIZE, total) + if done % 200 == 0 or done == total: + print(f" {done}/{total}") + + # Commit meta only after successful DB update — if scan failed on a + # previous run the CSV will still look "new" and trigger a full rescan. + if csv_changed and resolved_url: + _write_meta(resolved_url) + + print(f"Done. Updated {total} rows.") + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_autocli_jobs.py b/scripts/sync_autocli_jobs.py new file mode 100644 index 0000000..87b042a --- /dev/null +++ b/scripts/sync_autocli_jobs.py @@ -0,0 +1,634 @@ +#!/usr/bin/env python3 +"""Sync AutoCLI job JSON into Supabase with optional priority scoring.""" +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import pathlib +import re +import sys +from dataclasses import dataclass +from typing import Any, Iterable +from urllib.parse import urlparse, urlunparse + +# Ensure project root is on sys.path for `scripts.*` imports when invoked as +# python scripts/sync_autocli_jobs.py +_project_root = str(pathlib.Path(__file__).resolve().parent.parent) +if _project_root not in sys.path: + sys.path.insert(0, _project_root) + +from scripts.job_priority_scorer import score_job + +def _sha256_hex(data: bytes) -> str: + return hashlib.sha256(data).hexdigest() + + +def _canonical_json_bytes(value: Any) -> bytes: + return json.dumps(value, ensure_ascii=False, separators=(",", ":"), sort_keys=True).encode( + "utf-8" + ) + + +def _normalize_text(value: Any) -> str: + if value is None: + return "" + if isinstance(value, (int, float)): + return str(value) + if not isinstance(value, str): + return str(value) + return value.strip() + + +def _get_first_key(record: dict[str, Any], keys: Iterable[str]) -> str: + for key in keys: + if key in record and record[key] is not None: + v = _normalize_text(record[key]) + if v: + return v + return "" + + +# ── URL canonicalization (for dedup of ATS/external job URLs) ────────── + +LINKEDIN_PATTERN = re.compile( + r"^https?://(?:www\.)?linkedin\.com/", + re.IGNORECASE, +) + +ATS_DOMAINS = frozenset({ + "myworkdayjobs.com", + "greenhouse.io", + "lever.co", + "recruitee.com", + "applytojob.com", + "workable.com", + "breezy.hr", + "smartrecruiters.com", + "icims.com", + "successfactors.eu", + "successfactors.com", + "oraclecloud.com", + "taleo.net", +}) + +TRACKING_PARAMS = frozenset({ + "source", "share_id", "si", "li_fat_id", "trk", "trackingId", "tracking_id", + "ref", "referrer", + "fbclid", "gclid", "gclsrc", "dclid", "gbraid", "wbraid", + "msclkid", "twclid", "sc_campaign", "sc_channel", "sc_content", + "sc_medium", "sc_outcome", "sc_geo", "sc_country", + "gh_src", "lever_source", "lever-source", + "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", +}) + + +def _is_linkedin_url(url: str | None) -> bool: + """Return True if *url* is a linkedin.com URL.""" + if not url: + return False + return bool(LINKEDIN_PATTERN.match(url.strip())) + + +def _is_ats_url(url: str | None) -> bool: + """Return True if *url* points to a known ATS / career-portal domain.""" + if not url: + return False + try: + host = urlparse(url.strip()).hostname or "" + except Exception: + return False + host = host.lower() + # Strip www. prefix for matching + if host.startswith("www."): + host = host[4:] + for domain in ATS_DOMAINS: + if host == domain or host.endswith("." + domain): + return True + # Generic career portals (catch-alls after known ATS domains) + if host.endswith(".myworkdayjobs.com"): + return True + return False + + +def _canonicalize_url(raw_url: str | None) -> str: + """Normalize a URL for dedup: lowercase, strip trailing slash, remove tracking params. + + Returns the normalized URL string, or empty string if input is empty/falsy. + """ + if not raw_url: + return "" + try: + parsed = urlparse(raw_url.strip()) + scheme = parsed.scheme.lower() + netloc = parsed.netloc.lower() + # Strip trailing slash from path + path = parsed.path.rstrip("/") + if not path: + path = "/" + # Filter tracking query params + cleaned_pairs: list[str] = [] + if parsed.query: + for pair in parsed.query.split("&"): + k, _, v = pair.partition("=") + if k not in TRACKING_PARAMS: + cleaned_pairs.append(f"{k}={v}") + cleaned_query = "&".join(cleaned_pairs) + result = urlunparse((scheme, netloc, path, parsed.params, cleaned_query, "")) + return result.rstrip("?") + except Exception: + return raw_url.strip() + + +def _extract_canonical_job_url( + apply_url: str, + external_url: str, +) -> str: + """Determine the canonical job URL to use for identity computation. + + Priority (first non-empty, non-LinkedIn as identity): + 1. external_url if it is an ATS URL + 2. external_url if apply_url is LinkedIn (prefer any external_url over LinkedIn) + 3. apply_url if it is an ATS URL (not LinkedIn) + 4. apply_url as fallback (even if LinkedIn) + 5. empty string + """ + apply_url_s = apply_url.strip() if apply_url else "" + external_url_s = external_url.strip() if external_url else "" + + # Rule 1: external_url is ATS → use it + if _is_ats_url(external_url_s): + return _canonicalize_url(external_url_s) + + # Rule 2: apply_url is LinkedIn AND external_url exists → use external_url + if _is_linkedin_url(apply_url_s) and external_url_s: + return _canonicalize_url(external_url_s) + + # Rule 3: apply_url is ATS (not LinkedIn) → use it + if _is_ats_url(apply_url_s): + return _canonicalize_url(apply_url_s) + + # Rule 4: apply_url exists (even LinkedIn) → use it + if apply_url_s: + return _canonicalize_url(apply_url_s) + + # Rule 5: external_url exists → use it + if external_url_s: + return _canonicalize_url(external_url_s) + + return "" + + +def _extract_records(doc: Any) -> list[dict[str, Any]]: + if isinstance(doc, list): + return [r for r in doc if isinstance(r, dict)] + if isinstance(doc, dict): + for key in ("items", "results", "data"): + val = doc.get(key) + if isinstance(val, list): + return [r for r in val if isinstance(r, dict)] + raise ValueError("Unsupported JSON shape: expected array of objects") + + +def _load_dotenv(path: str | os.PathLike[str]) -> None: + """Minimal .env loader (no extra dependencies). + + - Ignores blank lines and comments starting with '#' + - Supports KEY=VALUE with optional surrounding quotes + - Does not override already-set environment variables + """ + p = pathlib.Path(path) + if not p.is_file(): + return + + for raw_line in p.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip().strip("'").strip('"') + if not key or key in os.environ: + continue + os.environ[key] = value + + +def _auto_load_env() -> None: + """Load env vars from `.env` if present. + + Search order: + 1) CWD/.env + 2) Project root (scripts/..)/.env + """ + _load_dotenv(pathlib.Path.cwd() / ".env") + _load_dotenv(pathlib.Path(__file__).resolve().parent.parent / ".env") + + +@dataclass(frozen=True) +class NormalizedJob: + source: str + identity_hash: str + job_title: str + company_name: str + location: str + salary: str + post_time: str + apply_url: str + external_url: str + job_description: str + description_hash: str + url: str + url_hash: str + source_channel: str + apply_type: str + raw_record: dict[str, Any] + raw_hash: str + + +def normalize_job(source: str, raw_record: dict[str, Any]) -> NormalizedJob | None: + apply_url = _get_first_key(raw_record, ("apply_url", "apply url", "applyUrl")) + external_url = _get_first_key(raw_record, ("external_url", "externalUrl")) + job_title = _get_first_key(raw_record, ("job_title", "jobTitle", "title")) + company_name = _get_first_key(raw_record, ("company_name", "companyName", "company")) + location = _get_first_key(raw_record, ("location",)) + salary = _get_first_key(raw_record, ("salary", "salary_range", "salaryRange")) + post_time = _get_first_key(raw_record, ("post_time", "postTime", "posted_date", "postedDate")) + job_description = _get_first_key(raw_record, ("job_description", "jobDescription", "description", "jd")) + + # Use canonical URL for identity, not raw apply_url (which may be a LinkedIn referrer) + canonical_url = _extract_canonical_job_url(apply_url, external_url) + if canonical_url: + identity_source = canonical_url + else: + if not job_title or not company_name: + return None + identity_source = f"{job_title.lower()}|{company_name.lower()}|{location.lower()}" + + identity_hash = _sha256_hex(identity_source.encode("utf-8")) + raw_hash = _sha256_hex(_canonical_json_bytes(raw_record)) + description_hash = _sha256_hex(job_description.encode("utf-8")) if job_description else "" + + url = _get_first_key(raw_record, ("url",)) + url_hash = _get_first_key(raw_record, ("url_hash",)) + source_channel = _get_first_key(raw_record, ("source_channel",)) + apply_type = _get_first_key(raw_record, ("apply_type",)) + if not apply_type: + easy_apply_raw = _get_first_key(raw_record, ("easy_apply",)) + if easy_apply_raw and easy_apply_raw.lower() in ("true", "1", "yes"): + apply_type = "easy_apply" + + return NormalizedJob( + source=source, + identity_hash=identity_hash, + job_title=job_title, + company_name=company_name, + location=location, + salary=salary, + post_time=post_time, + apply_url=apply_url, + external_url=external_url, + job_description=job_description, + description_hash=description_hash, + url=url, + url_hash=url_hash, + source_channel=source_channel, + apply_type=apply_type, + raw_record=raw_record, + raw_hash=raw_hash, + ) + + +def _create_supabase_client(url: str | None, key: str | None): + try: + # Move CWD to end of sys.path so a local `supabase/` dir (migrations + # folder in the project root) doesn't shadow the `supabase` PyPI package. + _path_clean = [p for p in sys.path if p not in ("", ".")] + _path_dirty = [p for p in sys.path if p in ("", ".")] + sys.path = _path_clean + _path_dirty + from supabase import create_client # noqa: F811 + except Exception as exc: + raise RuntimeError( + "Missing Python dependency 'supabase'. Install deps with:\n" + " uv pip install -r scripts/requirements.txt" + ) from exc + + url = url or os.environ.get("SUPABASE_URL", "") + key = key or os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY", "") + if not url or not key: + raise ValueError( + "Missing Supabase credentials. Set SUPABASE_URL and either " + "SUPABASE_SERVICE_ROLE_KEY (preferred) or SUPABASE_KEY (can be service-role or anon)." + ) + return create_client(url, key) + + +def upsert_job( + client, + job: NormalizedJob, + priority_score: float | None = None, + priority_tier: str | None = None, + priority_scorer_version: str | None = None, + priority_signals: dict | None = None, +) -> str: + params: dict[str, Any] = { + "p_source": job.source, + "p_identity_hash": job.identity_hash, + "p_job_title": job.job_title, + "p_company_name": job.company_name, + "p_location": job.location, + "p_salary": job.salary, + "p_post_time": job.post_time, + "p_apply_url": job.apply_url, + "p_external_url": job.external_url, + "p_job_description": job.job_description, + "p_description_hash": job.description_hash, + "p_raw_record": job.raw_record, + "p_raw_hash": job.raw_hash, + } + if job.url_hash: + params["p_url"] = job.url + params["p_url_hash"] = job.url_hash + if job.source_channel: + params["p_source_channel"] = job.source_channel + if job.apply_type: + params["p_apply_type"] = job.apply_type + if priority_score is not None: + params["p_priority_score"] = priority_score + if priority_tier is not None: + params["p_priority_tier"] = priority_tier + if priority_scorer_version is not None: + params["p_priority_scorer_version"] = priority_scorer_version + if priority_signals is not None: + params["p_priority_signals"] = priority_signals + resp = client.rpc("upsert_job", params).execute() + # supabase-py returns either scalar or list depending on RPC return shape; normalize. + data = resp.data + if isinstance(data, str): + return data + if isinstance(data, list) and data: + # Some PostgREST configs wrap scalar returns. + if isinstance(data[0], dict) and "upsert_job" in data[0]: + return str(data[0]["upsert_job"]) + return str(data[0]) + return str(data) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Sync AutoCLI job JSON into Supabase.") + parser.add_argument("--input", help="Path to JSON file (defaults to stdin).") + parser.add_argument("--source", default="linkedin", help="Source label stored in DB.") + parser.add_argument("--dry-run", action="store_true", help="Validate and summarize only.") + parser.add_argument("--limit", type=int, default=0, help="Cap number of rows processed.") + parser.add_argument("--supabase-url", dest="supabase_url", help="Override SUPABASE_URL.") + parser.add_argument("--supabase-key", dest="supabase_key", help="Override Supabase key.") + parser.add_argument( + "--env-file", + help="Optional path to a .env file to load (does not override existing env vars).", + ) + parser.add_argument( + "--disable-scoring", + action="store_true", + help="Skip priority scoring (useful for testing or backfill via separate script).", + ) + parser.add_argument( + "--min-priority-score", + type=float, + default=None, + help="Only upsert jobs with priority_score >= this value (default: upsert all).", + ) + parser.add_argument( + "--priority-tier", + choices=["high", "medium", "low", "reject"], + default=None, + help="Only upsert jobs with this priority_tier or above (default: upsert all).", + ) + args = parser.parse_args(argv) + + _auto_load_env() + if args.env_file: + _load_dotenv(args.env_file) + + raw_text = "" + if args.input: + raw_text = open(args.input, "r", encoding="utf-8").read() + else: + raw_text = sys.stdin.read() + + try: + doc = json.loads(raw_text) + except Exception as exc: + print(f"ERROR: invalid JSON input: {exc}", file=sys.stderr) + return 2 + + try: + records = _extract_records(doc) + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 2 + + if args.limit and args.limit > 0: + records = records[: args.limit] + + normalized: list[NormalizedJob] = [] + scored: list[tuple[NormalizedJob, dict[str, Any] | None]] = [] + skipped = 0 + for idx, rec in enumerate(records): + job = normalize_job(args.source, rec) + if job is None: + skipped += 1 + print( + f"WARN: skipping row {idx}: missing identity (need apply_url/external_url or job_title+company_name)", + file=sys.stderr, + ) + continue + normalized.append(job) + score_result = None + if not args.disable_scoring: + try: + # Build a dict with normalized keys so score_job can find + # job_title, company_name, job_description, post_time, etc. + # even when the raw record uses different key names. + score_result = score_job({ + "job_title": job.job_title, + "company_name": job.company_name, + "location": job.location, + "salary": job.salary, + "post_time": job.post_time, + "apply_url": job.apply_url, + "external_url": job.external_url, + "job_description": job.job_description, + "apply_type": job.apply_type, + "source_channel": job.source_channel, + "workplace_type": _get_first_key( + job.raw_record, ("workplace_type",) + ), + "raw_record": job.raw_record, + }) + except Exception: + # Scoring is non-critical -- log and continue without it + pass + scored.append((job, score_result)) + + if args.dry_run: + from collections import defaultdict + + url_groups: dict[str, list[NormalizedJob]] = defaultdict(list) + for job in normalized: + url_groups[job.identity_hash].append(job) + + duplicate_groups: list[dict[str, Any]] = [] + for id_hash, jobs in url_groups.items(): + if len(jobs) > 1: + duplicate_groups.append( + { + "identity_hash": id_hash, + "count": len(jobs), + "job_title": jobs[0].job_title, + "company_name": jobs[0].company_name, + "apply_urls": sorted(set(j.apply_url for j in jobs)), + "external_urls": sorted(set(j.external_url for j in jobs)), + } + ) + + report: dict[str, Any] = { + "source": args.source, + "input_rows": len(records), + "will_process": len(normalized), + "skipped": skipped, + "canonical_distinct_jobs": len(url_groups), + "duplicate_groups": len(duplicate_groups), + "scoring": not args.disable_scoring, + } + if not args.disable_scoring: + scored_results = [ + r for _, r in scored if r is not None + ] + if scored_results: + scores = [r.score for r in scored_results] + tiers = [r.tier for r in scored_results] + report["priority_scores"] = { + "min": round(min(scores), 1), + "max": round(max(scores), 1), + "avg": round(sum(scores) / len(scores), 1), + "total_scored": len(scored_results), + } + tier_counts: dict[str, int] = {} + for t in tiers: + tier_counts[t] = tier_counts.get(t, 0) + 1 + report["priority_tiers"] = tier_counts + report["low_priority_count"] = tier_counts.get("reject", 0) + + # Top 10 priority jobs + sorted_with_job = sorted( + [(j, r) for j, r in scored if r is not None], + key=lambda x: x[1].score, + reverse=True, + ) + top_10 = [] + for nj, sr in sorted_with_job[:10]: + sig = sr.signals + top_10.append({ + "title": nj.job_title, + "company": nj.company_name, + "location": nj.location, + "score": sr.score, + "tier": sr.tier, + "key_signals": { + "compensation": sig.get("compensation", {}).get("score"), + "role_fit": sig.get("role_fit", {}).get("score"), + "application_path": sig.get("application_friction", {}).get("reason"), + "source_quality": sig.get("source_quality", {}).get("score"), + }, + }) + report["top_priority_jobs"] = top_10 + + # Source-quality summary + recruiter_like = 0 + aggregator_like = 0 + low_info_easy_apply = 0 + raw_jd_fallback = 0 + for r in scored_results: + sq = r.signals.get("source_quality", {}) + if sq.get("recruiter_company") or sq.get("recruiter_phrase"): + recruiter_like += 1 + ap = r.signals.get("application_path", {}) + if ap.get("is_aggregator"): + aggregator_like += 1 + if sq.get("easy_apply_no_owned_url") and sq.get("missing_salary"): + low_info_easy_apply += 1 + dq = r.signals.get("data_quality", {}) + if dq.get("description_source") in ("raw", "raw_record.jd"): + raw_jd_fallback += 1 + report["source_quality_summary"] = { + "recruiter_like_rows": recruiter_like, + "aggregator_like_rows": aggregator_like, + "low_information_easy_apply_rows": low_info_easy_apply, + "raw_jd_fallback_rows": raw_jd_fallback, + } + if duplicate_groups: + report["duplicates"] = duplicate_groups + + print(json.dumps(report, indent=2, ensure_ascii=False)) + return 0 + + try: + client = _create_supabase_client(args.supabase_url, args.supabase_key) + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 2 + + # ── Priority filtering ────────────────────────────────────────────── + _TIER_ORDER = {"reject": 1, "low": 2, "medium": 3, "high": 4} + + def _passes_filter(score_result) -> bool: + if score_result is None: + return args.min_priority_score is None and args.priority_tier is None + if args.min_priority_score is not None and score_result.score < args.min_priority_score: + return False + if args.priority_tier is not None: + return _TIER_ORDER.get(score_result.tier, 0) >= _TIER_ORDER[args.priority_tier] + return True + + upserted = 0 + for idx, (job, score_result) in enumerate(scored): + if not _passes_filter(score_result): + continue + try: + if score_result is not None: + _ = upsert_job( + client, + job, + priority_score=score_result.score, + priority_tier=score_result.tier, + priority_scorer_version=score_result.version, + priority_signals=score_result.signals, + ) + else: + _ = upsert_job(client, job) + upserted += 1 + except Exception as exc: + print( + f"ERROR: upsert failed for row {idx} identity_hash={job.identity_hash}: {exc}", + file=sys.stderr, + ) + return 1 + + scored_count = sum(1 for _, r in scored if r is not None) + print( + json.dumps( + { + "source": args.source, + "input_rows": len(records), + "upserted": upserted, + "scored": scored_count, + "skipped": skipped, + }, + indent=2, + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/test_baseline.sh b/scripts/test_baseline.sh new file mode 100644 index 0000000..ac4186d --- /dev/null +++ b/scripts/test_baseline.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Test suite for autocli-baseline.sh +# Usage: bash scripts/test_baseline.sh +set -euo pipefail + +SCRIPT="scripts/autocli-baseline.sh" +PASS=0 +FAIL=0 + +green() { echo " ✓ $*"; } +red() { echo " ✗ $*"; } + +# Usage: check "description" command [args...] +# Tests that command exits 0 +check_pass() { + local desc="$1"; shift + if "$@"; then + green "$desc" + PASS=$((PASS + 1)) + else + red "$desc (expected exit 0, got $?)" + FAIL=$((FAIL + 1)) + fi +} + +# Usage: check_fail "description" command [args...] +# Tests that command exits non-zero +check_fail() { + local desc="$1"; shift + if ! "$@"; then + green "$desc" + PASS=$((PASS + 1)) + else + red "$desc (expected non-zero exit)" + FAIL=$((FAIL + 1)) + fi +} + +# Usage: check_contains "description" "pattern" command [args...] +# Tests that command output contains the pattern +check_contains() { + local desc="$1"; shift + local pattern="$1"; shift + if "$@" 2>&1 | sed 's/\x1b\[[0-9;]*m//g' | grep -q "$pattern"; then + green "$desc" + PASS=$((PASS + 1)) + else + red "$desc (output missing '$pattern')" + FAIL=$((FAIL + 1)) + fi +} + +echo "=== autocli-baseline.sh Test Suite ===" +echo "" + +# ── Test 1: Script exists and is executable ────────────────────────── +echo "[Test 1] Script file check" +check_pass "script exists" test -f "$SCRIPT" +check_pass "script executable" test -x "$SCRIPT" + +# ── Test 2: Help flag ──────────────────────────────────────────────── +echo "" +echo "[Test 2] --help flag" +check_pass "shows usage without error" bash "$SCRIPT" --help + +# ── Test 3: Check-only mode ────────────────────────────────────────── +echo "" +echo "[Test 3] --check-only mode" +check_pass "runs baseline checks" bash "$SCRIPT" --check-only +check_pass "all checks pass currently" bash "$SCRIPT" --check-only + +# ── Test 4: Log output format ──────────────────────────────────────── +echo "" +echo "[Test 4] Log format" +check_contains "has timestamp format" "[0-9][0-9]:[0-9][0-9]:[0-9][0-9]" bash "$SCRIPT" --check-only +check_contains "has CHECK markers" "CHECK" bash "$SCRIPT" --check-only +check_contains "shows passed count" "passed" bash "$SCRIPT" --check-only + +# ── Test 5: JSON output ────────────────────────────────────────────── +echo "" +echo "[Test 5] --json output" +JSON_OUT=$(bash "$SCRIPT" --check-only --json 2>/dev/null || true) +if echo "$JSON_OUT" | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'checks' in d; assert 'passed' in d; print('valid')" 2>/dev/null; then + check_pass "outputs valid JSON with checks" true +else + red "JSON output invalid or missing fields" + FAIL=$((FAIL + 1)) +fi + +# ── Test 6: Missing binary handled ─────────────────────────────────── +echo "" +echo "[Test 6] Missing binary simulation" +check_fail "handles missing autocli" env PATH=/usr/bin:/bin bash "$SCRIPT" --check-only 2>/dev/null + +# ── Test 7: Command passthrough ────────────────────────────────────── +echo "" +echo "[Test 7] Command passthrough" +RESULT=$(bash "$SCRIPT" -- echo "hello-autocli-test" 2>/dev/null || true) +if echo "$RESULT" | grep -q "hello-autocli-test"; then + check_pass "executes command after checks" true +else + red "command not executed after checks" + FAIL=$((FAIL + 1)) +fi + +# ── Test 8: Exit codes ─────────────────────────────────────────────── +echo "" +echo "[Test 8] Exit codes" +check_pass "--check-only succeeds" bash "$SCRIPT" --check-only +check_fail "--check-only with bad PATH fails" env PATH=/usr/bin:/bin bash "$SCRIPT" --check-only 2>/dev/null + +# ── Test 9: Extension freshness detection ──────────────────────────── +echo "" +echo "[Test 9] Extension freshness" + +# Simulate stale dist by touching it and setting an old refresh marker +REFRESH_MARKER="/tmp/.autocli-baseline-refresh-test" +EXT_DIST="extension/dist/background.js" + +if [ -f "$EXT_DIST" ]; then + # Create an old marker (epoch 0) + touch -t 200001010000 "$REFRESH_MARKER" 2>/dev/null || true + + # Run check — should warn about stale extension + OUT=$(AUTOCLI_REFRESH_MARKER="$REFRESH_MARKER" bash "$SCRIPT" --check-only 2>&1 || true) + if echo "$OUT" | grep -qi "refresh\|stale\|outdated\|newer\|behind"; then + check_pass "detects stale extension" true + else + red "did not detect stale extension" + FAIL=$((FAIL + 1)) + fi + + # Clean up + rm -f "$REFRESH_MARKER" +else + check_pass "dist file exists (skip freshness)" test -f "$EXT_DIST" +fi + +# ── Test 10: --refresh-extension flag exists ───────────────────────── +echo "" +echo "[Test 10] --refresh-extension flag" +check_contains "--refresh-extension in help" "refresh-extension" bash "$SCRIPT" --help + +# ── Summary ────────────────────────────────────────────────────────── +echo "" +echo "=========================================" +echo "Results: $PASS passed, $FAIL failed" +echo "=========================================" + +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/scripts/test_sync_autocli_jobs.py b/scripts/test_sync_autocli_jobs.py new file mode 100644 index 0000000..00cc99f --- /dev/null +++ b/scripts/test_sync_autocli_jobs.py @@ -0,0 +1,254 @@ +import re +import unittest +from typing import Any + +from scripts.sync_autocli_jobs import ( + _canonicalize_url, + _extract_canonical_job_url, + _is_linkedin_url, + _is_ats_url, + normalize_job, +) + + +class TestUrlHelpers(unittest.TestCase): + def test_is_linkedin_url_true(self) -> None: + self.assertTrue(_is_linkedin_url("https://www.linkedin.com/jobs/view/123")) + self.assertTrue(_is_linkedin_url("https://linkedin.com/jobs/view/123")) + self.assertTrue(_is_linkedin_url("http://linkedin.com/jobs/view/123")) + + def test_is_linkedin_url_false(self) -> None: + self.assertFalse(_is_linkedin_url("https://example.wd12.myworkdayjobs.com/job/123")) + self.assertFalse(_is_linkedin_url("")) + self.assertFalse(_is_linkedin_url(None)) + + def test_is_ats_url_true(self) -> None: + self.assertTrue(_is_ats_url("https://example.wd12.myworkdayjobs.com/job/123")) + self.assertTrue(_is_ats_url("https://jobs.lever.co/company/role")) + self.assertTrue(_is_ats_url("https://boards.greenhouse.io/company/jobs/123")) + self.assertTrue(_is_ats_url("https://example.recruitee.com/job/123")) + self.assertTrue(_is_ats_url("https://example.applytojob.com/apply/123")) + + def test_is_ats_url_false(self) -> None: + self.assertFalse(_is_ats_url("https://www.linkedin.com/jobs/view/123")) + self.assertFalse(_is_ats_url("https://linkedin.com/jobs/view/123")) + self.assertFalse(_is_ats_url("http://example.com/random")) + self.assertFalse(_is_ats_url("")) + self.assertFalse(_is_ats_url(None)) + + def test_canonicalize_url_lowercases_scheme_and_host(self) -> None: + result = _canonicalize_url("HTTPS://EXAMPLE.COM/Job/123") + self.assertEqual(result, "https://example.com/Job/123") + + def test_canonicalize_url_strips_trailing_slash(self) -> None: + result = _canonicalize_url("https://example.com/job/123/") + self.assertEqual(result, "https://example.com/job/123") + + def test_canonicalize_url_strips_tracking_params(self) -> None: + result = _canonicalize_url( + "https://example.wd12.myworkdayjobs.com/job/123?source=linkedin&share_id=abc123" + ) + self.assertEqual(result, "https://example.wd12.myworkdayjobs.com/job/123") + + def test_canonicalize_url_strips_utm_params(self) -> None: + result = _canonicalize_url( + "https://careers.example.com/job/456?utm_source=linkedin&utm_medium=social&keep=abc" + ) + self.assertEqual(result, "https://careers.example.com/job/456?keep=abc") + + def test_canonicalize_url_strips_gh_src(self) -> None: + result = _canonicalize_url( + "https://boards.greenhouse.io/company/jobs/123?gh_src=abc123" + ) + self.assertEqual(result, "https://boards.greenhouse.io/company/jobs/123") + + def test_canonicalize_url_strips_lever_source(self) -> None: + result = _canonicalize_url( + "https://jobs.lever.co/company/role?lever-source=linkedin" + ) + self.assertEqual(result, "https://jobs.lever.co/company/role") + + def test_canonicalize_url_keeps_stable_query_params(self) -> None: + result = _canonicalize_url( + "https://example.wd12.myworkdayjobs.com/job/123?jobId=456&source=linkedin" + ) + self.assertEqual(result, "https://example.wd12.myworkdayjobs.com/job/123?jobId=456") + + def test_canonicalize_url_empty(self) -> None: + self.assertEqual(_canonicalize_url(""), "") + self.assertEqual(_canonicalize_url(None), "") + + def test_extract_canonical_prefers_ats_external_url(self) -> None: + """When external_url is an ATS URL and apply_url is LinkedIn, use external_url.""" + result = _extract_canonical_job_url( + apply_url="https://www.linkedin.com/jobs/view/123", + external_url="https://example.wd12.myworkdayjobs.com/job/456", + ) + self.assertEqual(result, "https://example.wd12.myworkdayjobs.com/job/456") + + def test_extract_canonical_uses_ats_apply_url_when_no_external(self) -> None: + """When no external_url but apply_url is an ATS URL, use apply_url.""" + result = _extract_canonical_job_url( + apply_url="https://boards.greenhouse.io/company/jobs/123", + external_url="", + ) + self.assertEqual(result, "https://boards.greenhouse.io/company/jobs/123") + + def test_extract_canonical_uses_linkedin_as_last_resort(self) -> None: + """When no external_url and apply_url is LinkedIn, still use apply_url.""" + result = _extract_canonical_job_url( + apply_url="https://www.linkedin.com/jobs/view/123", + external_url="", + ) + self.assertEqual(result, "https://www.linkedin.com/jobs/view/123") + + def test_extract_canonical_returns_empty_when_none(self) -> None: + result = _extract_canonical_job_url(apply_url="", external_url="") + self.assertEqual(result, "") + + def test_extract_canonical_canonicalizes_result(self) -> None: + """Result should be canonicalized (normalized host, stripped trailing slash, etc).""" + result = _extract_canonical_job_url( + apply_url="https://www.linkedin.com/jobs/view/123", + external_url="HTTPS://EXAMPLE.WD12.MYWORKDAYJOBS.COM/Job/456/?source=linkedin", + ) + self.assertEqual(result, "https://example.wd12.myworkdayjobs.com/Job/456") + + +class TestNormalizeJobDedup(unittest.TestCase): + """Regression tests for deduplication of same ATS job arriving via different URLs.""" + + def test_same_workday_job_produces_same_identity_hash(self) -> None: + """Ameresco case: same Workday URL, different apply_url shape → same identity_hash. + + Record A: has LinkedIn apply_url + Workday external_url + Record B: has same Workday external_url, no apply_url + Both must produce the same identity_hash. + """ + workday_url = "https://ameresco.wd1.myworkdayjobs.com/en-US/Ameresco_Careers/job/Ameresco-Senior-Developer" + linkedin_url = "https://www.linkedin.com/jobs/view/1234567890" + + job_a = normalize_job( + "linkedin", + { + "apply_url": linkedin_url, + "external_url": workday_url, + "job_title": "Senior Developer", + "company_name": "Ameresco", + "location": "Framingham, MA", + }, + ) + job_b = normalize_job( + "linkedin", + { + "external_url": workday_url, + "job_title": "Senior Developer", + "company_name": "Ameresco", + "location": "Framingham, MA", + }, + ) + + self.assertIsNotNone(job_a) + self.assertIsNotNone(job_b) + assert job_a is not None and job_b is not None + self.assertEqual( + job_a.identity_hash, + job_b.identity_hash, + "Same Workday URL with different apply_url shapes must produce the same identity_hash", + ) + + def test_different_ats_urls_produce_different_hashes(self) -> None: + """Different ATS URLs should still produce different identity hashes.""" + job_a = normalize_job( + "linkedin", + { + "external_url": "https://company.wd1.myworkdayjobs.com/job/111", + "job_title": "Engineer", + "company_name": "Acme", + "location": "Remote", + }, + ) + job_b = normalize_job( + "linkedin", + { + "external_url": "https://company.wd1.myworkdayjobs.com/job/222", + "job_title": "Engineer", + "company_name": "Acme", + "location": "Remote", + }, + ) + + self.assertIsNotNone(job_a) + self.assertIsNotNone(job_b) + assert job_a is not None and job_b is not None + self.assertNotEqual( + job_a.identity_hash, + job_b.identity_hash, + "Different ATS URLs must produce different identity hashes", + ) + + def test_tracking_params_in_url_dont_change_identity(self) -> None: + """Same Workday URL with/without tracking params → same identity_hash.""" + job_a = normalize_job( + "linkedin", + { + "external_url": "https://ameresco.wd1.myworkdayjobs.com/Job/123", + "job_title": "Dev", + "company_name": "Co", + "location": "Remote", + }, + ) + job_b = normalize_job( + "linkedin", + { + "external_url": "https://ameresco.wd1.myworkdayjobs.com/Job/123?source=linkedin&utm_campaign=recruiting", + "job_title": "Dev", + "company_name": "Co", + "location": "Remote", + }, + ) + + self.assertIsNotNone(job_a) + self.assertIsNotNone(job_b) + assert job_a is not None and job_b is not None + self.assertEqual( + job_a.identity_hash, + job_b.identity_hash, + "Tracking params must not affect identity hash", + ) + + def test_existing_identity_via_apply_url_still_works(self) -> None: + """Non-LinkedIn, non-ATS apply_url still produces identity.""" + job = normalize_job( + "linkedin", + { + "apply url": "https://example.com/apply/123", + "job_title": "Engineer", + "company_name": "Acme", + "location": "Remote", + }, + ) + self.assertIsNotNone(job) + assert job is not None + self.assertNotEqual(job.identity_hash, "") + + def test_linkedin_apply_url_preserved_as_metadata(self) -> None: + """LinkedIn URL should still be stored as apply_url, just not used for identity.""" + job = normalize_job( + "linkedin", + { + "apply_url": "https://www.linkedin.com/jobs/view/999", + "external_url": "https://careers.example.com/job/555", + "job_title": "Engineer", + "company_name": "Acme", + "location": "Remote", + }, + ) + self.assertIsNotNone(job) + assert job is not None + self.assertEqual(job.apply_url, "https://www.linkedin.com/jobs/view/999") + self.assertEqual(job.external_url, "https://careers.example.com/job/555") + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/uv-install.sh b/scripts/uv-install.sh new file mode 100755 index 0000000..3f2d1db --- /dev/null +++ b/scripts/uv-install.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Installs Python deps for scripts/ using uv. +# Prereq: uv installed (https://github.com/astral-sh/uv) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +cd "$PROJECT_DIR" + +if ! command -v uv >/dev/null 2>&1; then + echo "ERROR: uv not found in PATH." + echo "Install: https://github.com/astral-sh/uv" + exit 1 +fi + +uv venv +source .venv/bin/activate +uv pip install -r scripts/requirements.txt + +echo "OK: installed deps into $PROJECT_DIR/.venv" diff --git a/supabase/migrations/20260503192000_create_jobs_schema_and_sync_rpc.sql b/supabase/migrations/20260503192000_create_jobs_schema_and_sync_rpc.sql new file mode 100644 index 0000000..2483863 --- /dev/null +++ b/supabase/migrations/20260503192000_create_jobs_schema_and_sync_rpc.sql @@ -0,0 +1,205 @@ +-- Create jobs schema and tables for raw job ingestion + future structured extraction. +-- Also adds an RPC helper for conditional upsert behavior used by sync scripts. + +create extension if not exists pgcrypto; + +create schema if not exists jobs; + +create table if not exists jobs.jobs ( + id uuid primary key default gen_random_uuid(), + source text not null, + identity_hash text not null, + + job_title text, + company_name text, + location text, + salary text, + post_time text, + apply_url text, + external_url text, + job_description text, + + description_hash text, + raw_record jsonb not null, + raw_hash text not null, + + first_seen_at timestamptz not null default now(), + last_seen_at timestamptz not null default now(), + ingest_count integer not null default 1, + + created_at timestamptz not null default now(), + updated_at timestamptz not null default now(), + + constraint jobs_source_identity_uniq unique (source, identity_hash) +); + +create index if not exists jobs_jobs_company_name_idx on jobs.jobs (company_name); +create index if not exists jobs_jobs_location_idx on jobs.jobs (location); +create index if not exists jobs_jobs_last_seen_at_idx on jobs.jobs (last_seen_at desc); +create index if not exists jobs_jobs_created_at_idx on jobs.jobs (created_at desc); +create index if not exists jobs_jobs_raw_record_gin on jobs.jobs using gin (raw_record); + +create table if not exists jobs.jd_structured ( + id uuid primary key default gen_random_uuid(), + job_id uuid not null references jobs.jobs (id) on delete cascade, + + schema_version text not null, + extractor_version text not null, + prompt_version text not null, + status text not null default 'pending', + + structured jsonb, + confidence numeric, + validation_errors jsonb, + + created_at timestamptz not null default now(), + updated_at timestamptz not null default now(), + + constraint jd_structured_job_id_uniq unique (job_id), + constraint jd_structured_status_check check ( + status in ('pending', 'processing', 'ok', 'failed', 'dead_letter') + ) +); + +create index if not exists jd_structured_status_idx on jobs.jd_structured (status); +create index if not exists jd_structured_structured_gin on jobs.jd_structured using gin (structured); + +-- RPC: upsert a job row with "do not overwrite with empty" semantics. +-- Returns the job id (uuid) of the inserted/updated row. +create or replace function jobs.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text +) +returns uuid +language plpgsql +security definer +as $$ +declare + v_id uuid; +begin + insert into jobs.jobs ( + source, + identity_hash, + job_title, + company_name, + location, + salary, + post_time, + apply_url, + external_url, + job_description, + description_hash, + raw_record, + raw_hash, + first_seen_at, + last_seen_at, + ingest_count, + created_at, + updated_at + ) + values ( + p_source, + p_identity_hash, + nullif(p_job_title, ''), + nullif(p_company_name, ''), + nullif(p_location, ''), + nullif(p_salary, ''), + nullif(p_post_time, ''), + nullif(p_apply_url, ''), + nullif(p_external_url, ''), + nullif(p_job_description, ''), + nullif(p_description_hash, ''), + coalesce(p_raw_record, '{}'::jsonb), + p_raw_hash, + now(), + now(), + 1, + now(), + now() + ) + on conflict (source, identity_hash) + do update set + job_title = coalesce(nullif(excluded.job_title, ''), jobs.jobs.job_title), + company_name = coalesce(nullif(excluded.company_name, ''), jobs.jobs.company_name), + location = coalesce(nullif(excluded.location, ''), jobs.jobs.location), + salary = coalesce(nullif(excluded.salary, ''), jobs.jobs.salary), + post_time = coalesce(nullif(excluded.post_time, ''), jobs.jobs.post_time), + apply_url = coalesce(nullif(excluded.apply_url, ''), jobs.jobs.apply_url), + external_url = coalesce(nullif(excluded.external_url, ''), jobs.jobs.external_url), + job_description = coalesce(nullif(excluded.job_description, ''), jobs.jobs.job_description), + description_hash = coalesce(nullif(excluded.description_hash, ''), jobs.jobs.description_hash), + raw_record = excluded.raw_record, + raw_hash = excluded.raw_hash, + last_seen_at = now(), + ingest_count = jobs.jobs.ingest_count + 1, + updated_at = now() + returning id into v_id; + + return v_id; +end; +$$; + +-- Public wrapper to ensure PostgREST/Supabase RPC exposure works even when the +-- `jobs` schema is not part of the exposed API schemas. +create or replace function public.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text +) +returns uuid +language sql +security definer +as $$ + select jobs.upsert_job( + p_source, + p_identity_hash, + p_job_title, + p_company_name, + p_location, + p_salary, + p_post_time, + p_apply_url, + p_external_url, + p_job_description, + p_description_hash, + p_raw_record, + p_raw_hash + ); +$$; + +-- Allow calling the public RPC from PostgREST clients. +grant execute on function public.upsert_job( + text, text, text, text, text, text, text, text, text, text, text, jsonb, text +) to anon, authenticated; + +-- Convenience views in `public` so the Supabase Table Editor (default schema=public) +-- can show the data without switching schemas. +create or replace view public.jobs_jobs as +select * from jobs.jobs; + +create or replace view public.jobs_jd_structured as +select * from jobs.jd_structured; + +grant select on public.jobs_jobs to anon, authenticated; +grant select on public.jobs_jd_structured to anon, authenticated; diff --git a/supabase/migrations/20260504000001_add_url_hash_to_jobs.sql b/supabase/migrations/20260504000001_add_url_hash_to_jobs.sql new file mode 100644 index 0000000..9509985 --- /dev/null +++ b/supabase/migrations/20260504000001_add_url_hash_to_jobs.sql @@ -0,0 +1,148 @@ +-- Add url / url_hash columns for LinkedIn job dedup by normalized URL. +-- url_hash = sha256(normalized_url) where normalized_url has tracking params removed. +-- The unique index on url_hash acts as the DB-level constraint for dedup. + +alter table jobs.jobs add column if not exists url text; +alter table jobs.jobs add column if not exists url_hash text; + +create unique index if not exists jobs_jobs_url_hash_uidx on jobs.jobs (url_hash); + +-- Updated RPC: accepts p_url and p_url_hash. +drop function if exists jobs.upsert_job cascade; + +create or replace function jobs.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null +) +returns uuid +language plpgsql +security definer +as $$ +declare + v_id uuid; +begin + insert into jobs.jobs ( + source, + identity_hash, + job_title, + company_name, + location, + salary, + post_time, + apply_url, + external_url, + job_description, + description_hash, + raw_record, + raw_hash, + url, + url_hash, + first_seen_at, + last_seen_at, + ingest_count, + created_at, + updated_at + ) + values ( + p_source, + p_identity_hash, + nullif(p_job_title, ''), + nullif(p_company_name, ''), + nullif(p_location, ''), + nullif(p_salary, ''), + nullif(p_post_time, ''), + nullif(p_apply_url, ''), + nullif(p_external_url, ''), + nullif(p_job_description, ''), + nullif(p_description_hash, ''), + coalesce(p_raw_record, '{}'::jsonb), + p_raw_hash, + nullif(p_url, ''), + nullif(p_url_hash, ''), + now(), + now(), + 1, + now(), + now() + ) + on conflict (source, identity_hash) + do update set + job_title = coalesce(nullif(excluded.job_title, ''), jobs.jobs.job_title), + company_name = coalesce(nullif(excluded.company_name, ''), jobs.jobs.company_name), + location = coalesce(nullif(excluded.location, ''), jobs.jobs.location), + salary = coalesce(nullif(excluded.salary, ''), jobs.jobs.salary), + post_time = coalesce(nullif(excluded.post_time, ''), jobs.jobs.post_time), + apply_url = coalesce(nullif(excluded.apply_url, ''), jobs.jobs.apply_url), + external_url = coalesce(nullif(excluded.external_url, ''), jobs.jobs.external_url), + job_description = coalesce(nullif(excluded.job_description, ''), jobs.jobs.job_description), + description_hash = coalesce(nullif(excluded.description_hash, ''), jobs.jobs.description_hash), + raw_record = excluded.raw_record, + raw_hash = excluded.raw_hash, + url = coalesce(nullif(excluded.url, ''), jobs.jobs.url), + url_hash = coalesce(nullif(excluded.url_hash, ''), jobs.jobs.url_hash), + last_seen_at = now(), + ingest_count = jobs.jobs.ingest_count + 1, + updated_at = now() + returning id into v_id; + + return v_id; +end; +$$; + +-- Recreate public wrapper. +create or replace function public.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null +) +returns uuid +language sql +security definer +as $$ + select jobs.upsert_job( + p_source, + p_identity_hash, + p_job_title, + p_company_name, + p_location, + p_salary, + p_post_time, + p_apply_url, + p_external_url, + p_job_description, + p_description_hash, + p_raw_record, + p_raw_hash, + p_url, + p_url_hash + ); +$$; + +grant execute on function public.upsert_job( + text, text, text, text, text, text, text, text, text, text, text, jsonb, text, text, text +) to anon, authenticated; diff --git a/supabase/migrations/20260505000001_add_source_channel_apply_type.sql b/supabase/migrations/20260505000001_add_source_channel_apply_type.sql new file mode 100644 index 0000000..f7d242a --- /dev/null +++ b/supabase/migrations/20260505000001_add_source_channel_apply_type.sql @@ -0,0 +1,202 @@ +-- Standardize source field, add source_channel / apply_type, create job_source_records. + +-- 1. Add new columns to jobs.jobs +alter table jobs.jobs +add column if not exists source_channel text not null default 'unknown'; + +alter table jobs.jobs +add column if not exists apply_type text not null default 'unknown'; + +alter table jobs.jobs +add constraint jobs_jobs_apply_type_check +check (apply_type in ('easy_apply', 'external', 'unknown')); + +-- 2. Migrate existing linkedin_recommended → linkedin + recommended +update jobs.jobs +set source = 'linkedin', + source_channel = 'recommended' +where source = 'linkedin_recommended'; + +-- 3. Update records where easy_apply was set in raw_record +update jobs.jobs +set apply_type = 'easy_apply' +where source = 'linkedin' + and raw_record->>'easy_apply' in ('true', 'True'); + +update jobs.jobs +set apply_type = 'external' +where source = 'linkedin' + and raw_record->>'easy_apply' in ('false', 'False'); + +-- 4. Drop old upsert_job RPCs and recreate with url_hash as conflict target + new fields. +drop function if exists jobs.upsert_job cascade; + +create or replace function jobs.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null, + p_source_channel text default 'unknown', + p_apply_type text default 'unknown' +) +returns uuid +language plpgsql +security definer +as $$ +declare + v_id uuid; +begin + insert into jobs.jobs ( + source, + identity_hash, + job_title, + company_name, + location, + salary, + post_time, + apply_url, + external_url, + job_description, + description_hash, + raw_record, + raw_hash, + url, + url_hash, + source_channel, + apply_type, + first_seen_at, + last_seen_at, + ingest_count, + created_at, + updated_at + ) + values ( + p_source, + p_identity_hash, + nullif(p_job_title, ''), + nullif(p_company_name, ''), + nullif(p_location, ''), + nullif(p_salary, ''), + nullif(p_post_time, ''), + nullif(p_apply_url, ''), + nullif(p_external_url, ''), + nullif(p_job_description, ''), + nullif(p_description_hash, ''), + coalesce(p_raw_record, '{}'::jsonb), + p_raw_hash, + nullif(p_url, ''), + nullif(p_url_hash, ''), + nullif(p_source_channel, ''), + nullif(p_apply_type, ''), + now(), + now(), + 1, + now(), + now() + ) + on conflict (source, identity_hash) + do update set + job_title = coalesce(nullif(excluded.job_title, ''), jobs.jobs.job_title), + company_name = coalesce(nullif(excluded.company_name, ''), jobs.jobs.company_name), + location = coalesce(nullif(excluded.location, ''), jobs.jobs.location), + salary = coalesce(nullif(excluded.salary, ''), jobs.jobs.salary), + post_time = coalesce(nullif(excluded.post_time, ''), jobs.jobs.post_time), + apply_url = coalesce(nullif(excluded.apply_url, ''), jobs.jobs.apply_url), + external_url = coalesce(nullif(excluded.external_url, ''), jobs.jobs.external_url), + job_description = coalesce(nullif(excluded.job_description, ''), jobs.jobs.job_description), + description_hash = coalesce(nullif(excluded.description_hash, ''), jobs.jobs.description_hash), + raw_record = excluded.raw_record, + raw_hash = excluded.raw_hash, + url = coalesce(nullif(excluded.url, ''), jobs.jobs.url), + url_hash = coalesce(nullif(excluded.url_hash, ''), jobs.jobs.url_hash), + source_channel = coalesce(nullif(excluded.source_channel, ''), jobs.jobs.source_channel), + apply_type = coalesce(nullif(excluded.apply_type, ''), jobs.jobs.apply_type), + last_seen_at = now(), + ingest_count = jobs.jobs.ingest_count + 1, + updated_at = now() + returning id into v_id; + + return v_id; +end; +$$; + +-- 5. Update public wrapper +create or replace function public.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null, + p_source_channel text default 'unknown', + p_apply_type text default 'unknown' +) +returns uuid +language sql +security definer +as $$ + select jobs.upsert_job( + p_source, + p_identity_hash, + p_job_title, + p_company_name, + p_location, + p_salary, + p_post_time, + p_apply_url, + p_external_url, + p_job_description, + p_description_hash, + p_raw_record, + p_raw_hash, + p_url, + p_url_hash, + p_source_channel, + p_apply_type + ); +$$; + +grant execute on function public.upsert_job( + text, text, text, text, text, text, text, text, text, text, text, jsonb, text, text, text, text, text +) to anon, authenticated; + +-- 6. Create job_source_records table +create table if not exists jobs.job_source_records ( + id bigserial primary key, + job_id uuid references jobs.jobs (id) on delete cascade, + source text not null, + source_channel text not null default 'unknown', + source_job_id text, + external_url text, + normalized_url text, + url_hash text not null, + easy_apply boolean, + raw_record jsonb, + scraped_at timestamptz not null default now(), + created_at timestamptz not null default now(), + unique (source, url_hash) +); + +create index if not exists job_source_records_job_id_idx on jobs.job_source_records (job_id); +create index if not exists job_source_records_url_hash_idx on jobs.job_source_records (url_hash); +create index if not exists job_source_records_scraped_at_idx on jobs.job_source_records (scraped_at desc); diff --git a/supabase/migrations/20260505000002_drop_url_hash_unique.sql b/supabase/migrations/20260505000002_drop_url_hash_unique.sql new file mode 100644 index 0000000..01ed71a --- /dev/null +++ b/supabase/migrations/20260505000002_drop_url_hash_unique.sql @@ -0,0 +1,8 @@ +-- Drop the url_hash unique constraint (replaced by (source, identity_hash) in upsert) +-- url_hash dedup is handled at the application level in clean_linkedin_jobs.py +-- A regular index is sufficient for query performance + +drop index if exists jobs.jobs_jobs_url_hash_uidx; + +create index if not exists jobs_jobs_url_hash_idx on jobs.jobs (url_hash) +where url_hash is not null and url_hash != ''; diff --git a/supabase/migrations/20260505000003_cleanup_old_linkedin_data.sql b/supabase/migrations/20260505000003_cleanup_old_linkedin_data.sql new file mode 100644 index 0000000..33d330d --- /dev/null +++ b/supabase/migrations/20260505000003_cleanup_old_linkedin_data.sql @@ -0,0 +1,71 @@ +-- Clean up old LinkedIn job records imported before the pipeline fix. +-- +-- Invariants enforced: +-- source = 'linkedin' -> source_channel = 'recommended' +-- url is populated from raw_record->>'url' (or source_url/linkedin_url/job_url) +-- apply_type derived from raw_record->>'easy_apply' +-- apply_url set from external_url for external jobs, NULL for easy_apply +-- url_hash generated for records that now have a url + +-- Step 1: Fix source_channel for old records +update jobs.jobs +set source_channel = 'recommended' +where source = 'linkedin' + and source_channel = 'unknown'; + +-- Step 2: Populate url from raw_record when missing +-- Raw LinkedIn records stored the job URL in the 'url' key +update jobs.jobs +set url = coalesce( + nullif(raw_record->>'source_url', ''), + nullif(raw_record->>'linkedin_url', ''), + nullif(raw_record->>'job_url', ''), + nullif(raw_record->>'url', '') + ) +where source = 'linkedin' + and (url is null or url = '') + and raw_record is not null + and raw_record != '{}'::jsonb; + +-- Step 3: Generate url_hash for records that now have a url +update jobs.jobs +set url_hash = encode(sha256(coalesce(nullif(url, ''), '')::bytea), 'hex') +where source = 'linkedin' + and (url_hash is null or url_hash = '') + and url is not null + and url != ''; + +-- Step 4: Set apply_type and apply_url based on raw_record->>'easy_apply' +-- easy_apply=true -> apply_type=easy_apply, apply_url=NULL +update jobs.jobs +set apply_type = 'easy_apply', + apply_url = null +where source = 'linkedin' + and apply_type = 'unknown' + and raw_record->>'easy_apply' in ('true', 'True', '1'); + +-- easy_apply=false -> apply_type=external, apply_url=external_url +update jobs.jobs +set apply_type = 'external', + apply_url = coalesce(nullif(raw_record->>'external_url', ''), external_url) +where source = 'linkedin' + and apply_type = 'unknown' + and raw_record->>'easy_apply' in ('false', 'False', '0'); + +-- Records without easy_apply in raw_record but with external_url -> apply_type=external +update jobs.jobs +set apply_type = 'external', + apply_url = coalesce(nullif(raw_record->>'external_url', ''), external_url) +where source = 'linkedin' + and apply_type = 'unknown' + and (raw_record->>'easy_apply' is null or raw_record->>'easy_apply' = '') + and nullif(raw_record->>'external_url', '') is not null; + +-- Remaining records without easy_apply and without external_url -> easy_apply (LinkedIn default) +update jobs.jobs +set apply_type = 'easy_apply', + apply_url = null +where source = 'linkedin' + and apply_type = 'unknown' + and (raw_record->>'easy_apply' is null or raw_record->>'easy_apply' = '') + and nullif(raw_record->>'external_url', '') is null; diff --git a/supabase/migrations/20260509182000_add_priority_scoring_columns.sql b/supabase/migrations/20260509182000_add_priority_scoring_columns.sql new file mode 100644 index 0000000..da50789 --- /dev/null +++ b/supabase/migrations/20260509182000_add_priority_scoring_columns.sql @@ -0,0 +1,218 @@ +-- Add priority scoring columns to jobs.jobs +-- +-- Stores the output of the deterministic job_priority_scorer engine so that +-- the sync pipeline can set priority at ingest-time and the UI / batch +-- backfill can query without re-scoring every job every time. + +-- 1. Add columns to jobs.jobs +alter table jobs.jobs +add column if not exists priority_score numeric(5,1) not null default 0; + +alter table jobs.jobs +add column if not exists priority_tier text not null default 'unknown'; + +alter table jobs.jobs +add column if not exists priority_scorer_version text not null default 'job-priority-v1'; + +alter table jobs.jobs +add column if not exists priority_signals jsonb not null default '{}'::jsonb; + +-- priority_scored_at is intentionally nullable: indicates *when* scoring happened, +-- NULL means the row has never been scored (e.g. before backfill) +alter table jobs.jobs +add column if not exists priority_scored_at timestamptz; + +-- Validate priority_tier values +alter table jobs.jobs +add constraint jobs_jobs_priority_tier_check +check (priority_tier in ('high', 'medium', 'low', 'reject', 'unknown')); + +create index if not exists jobs_jobs_priority_score_idx on jobs.jobs (priority_score desc, last_seen_at desc); +create index if not exists jobs_jobs_priority_tier_idx on jobs.jobs (priority_tier, priority_score desc); +create index if not exists jobs_jobs_priority_scored_at_idx on jobs.jobs (priority_scored_at desc); + +-- 2. Drop old upsert_job RPCs and recreate with priority params +drop function if exists jobs.upsert_job cascade; + +create or replace function jobs.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null, + p_source_channel text default 'unknown', + p_apply_type text default 'unknown', + p_priority_score numeric default null, + p_priority_tier text default null, + p_priority_scorer_version text default null, + p_priority_signals jsonb default null +) +returns uuid +language plpgsql +security definer +as $$ +declare + v_id uuid; +begin + insert into jobs.jobs ( + source, + identity_hash, + job_title, + company_name, + location, + salary, + post_time, + apply_url, + external_url, + job_description, + description_hash, + raw_record, + raw_hash, + url, + url_hash, + source_channel, + apply_type, + priority_score, + priority_tier, + priority_scorer_version, + priority_signals, + first_seen_at, + last_seen_at, + ingest_count, + created_at, + updated_at + ) + values ( + p_source, + p_identity_hash, + nullif(p_job_title, ''), + nullif(p_company_name, ''), + nullif(p_location, ''), + nullif(p_salary, ''), + nullif(p_post_time, ''), + nullif(p_apply_url, ''), + nullif(p_external_url, ''), + nullif(p_job_description, ''), + nullif(p_description_hash, ''), + coalesce(p_raw_record, '{}'::jsonb), + p_raw_hash, + nullif(p_url, ''), + nullif(p_url_hash, ''), + nullif(p_source_channel, ''), + nullif(p_apply_type, ''), + case when p_priority_score is not null then p_priority_score else 0 end, + coalesce(nullif(p_priority_tier, ''), 'unknown'), + coalesce(nullif(p_priority_scorer_version, ''), 'job-priority-v1'), + coalesce(p_priority_signals, '{}'::jsonb), + now(), + now(), + 1, + now(), + now() + ) + on conflict (source, identity_hash) + do update set + job_title = coalesce(nullif(excluded.job_title, ''), jobs.jobs.job_title), + company_name = coalesce(nullif(excluded.company_name, ''), jobs.jobs.company_name), + location = coalesce(nullif(excluded.location, ''), jobs.jobs.location), + salary = coalesce(nullif(excluded.salary, ''), jobs.jobs.salary), + post_time = coalesce(nullif(excluded.post_time, ''), jobs.jobs.post_time), + apply_url = coalesce(nullif(excluded.apply_url, ''), jobs.jobs.apply_url), + external_url = coalesce(nullif(excluded.external_url, ''), jobs.jobs.external_url), + job_description = coalesce(nullif(excluded.job_description, ''), jobs.jobs.job_description), + description_hash = coalesce(nullif(excluded.description_hash, ''), jobs.jobs.description_hash), + raw_record = excluded.raw_record, + raw_hash = excluded.raw_hash, + url = coalesce(nullif(excluded.url, ''), jobs.jobs.url), + url_hash = coalesce(nullif(excluded.url_hash, ''), jobs.jobs.url_hash), + source_channel = coalesce(nullif(excluded.source_channel, ''), jobs.jobs.source_channel), + apply_type = coalesce(nullif(excluded.apply_type, ''), jobs.jobs.apply_type), + priority_score = case when excluded.priority_score is not null then excluded.priority_score else jobs.jobs.priority_score end, + priority_tier = case when excluded.priority_tier is not null then excluded.priority_tier else jobs.jobs.priority_tier end, + priority_scorer_version = case when excluded.priority_scorer_version is not null then excluded.priority_scorer_version else jobs.jobs.priority_scorer_version end, + priority_signals = case when excluded.priority_signals is not null then excluded.priority_signals else jobs.jobs.priority_signals end, + priority_scored_at = case + when excluded.priority_score is not null then now() + else jobs.jobs.priority_scored_at + end, + last_seen_at = now(), + ingest_count = jobs.jobs.ingest_count + 1, + updated_at = now() + returning id into v_id; + + -- Also set priority_scored_at on INSERT when priority_score was provided + if v_id is not null and p_priority_score is not null then + update jobs.jobs set priority_scored_at = now() where id = v_id; + end if; + + return v_id; +end; +$$; + +-- 3. Recreate public wrapper +create or replace function public.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null, + p_source_channel text default 'unknown', + p_apply_type text default 'unknown', + p_priority_score numeric default null, + p_priority_tier text default null, + p_priority_scorer_version text default null, + p_priority_signals jsonb default null +) +returns uuid +language sql +security definer +as $$ + select jobs.upsert_job( + p_source, + p_identity_hash, + p_job_title, + p_company_name, + p_location, + p_salary, + p_post_time, + p_apply_url, + p_external_url, + p_job_description, + p_description_hash, + p_raw_record, + p_raw_hash, + p_url, + p_url_hash, + p_source_channel, + p_apply_type, + p_priority_score, + p_priority_tier, + p_priority_scorer_version, + p_priority_signals + ); +$$; + +grant execute on function public.upsert_job( + text, text, text, text, text, text, text, text, text, text, text, jsonb, text, + text, text, text, text, numeric, text, text, jsonb +) to anon, authenticated; diff --git a/supabase/migrations/20260509184000_add_backfill_priority_rpc.sql b/supabase/migrations/20260509184000_add_backfill_priority_rpc.sql new file mode 100644 index 0000000..5b75b03 --- /dev/null +++ b/supabase/migrations/20260509184000_add_backfill_priority_rpc.sql @@ -0,0 +1,55 @@ +-- Add update_job_priority_score RPC for backfill scripts. +-- +-- Unlike upsert_job (which handles the full row), this RPC only touches the +-- priority-scoring columns so that batch-backfill does not accidentally +-- overwrite extracted job fields that may have been enriched since ingest. + +-- 1. Schema-scoped RPC +create or replace function jobs.update_job_priority_score( + p_job_id uuid, + p_priority_score numeric, + p_priority_tier text, + p_priority_scorer_version text, + p_priority_signals jsonb +) +returns void +language plpgsql +security definer +as $$ +begin + update jobs.jobs + set + priority_score = p_priority_score, + priority_tier = p_priority_tier, + priority_scorer_version = p_priority_scorer_version, + priority_signals = p_priority_signals, + priority_scored_at = now(), + updated_at = now() + where id = p_job_id; +end; +$$; + +-- 2. Public wrapper +create or replace function public.update_job_priority_score( + p_job_id uuid, + p_priority_score numeric, + p_priority_tier text, + p_priority_scorer_version text, + p_priority_signals jsonb +) +returns void +language sql +security definer +as $$ + select jobs.update_job_priority_score( + p_job_id, + p_priority_score, + p_priority_tier, + p_priority_scorer_version, + p_priority_signals + ); +$$; + +grant execute on function public.update_job_priority_score( + uuid, numeric, text, text, jsonb +) to anon, authenticated; diff --git a/supabase/migrations/20260516120000_fix_priority_upsert_data_loss.sql b/supabase/migrations/20260516120000_fix_priority_upsert_data_loss.sql new file mode 100644 index 0000000..6794b44 --- /dev/null +++ b/supabase/migrations/20260516120000_fix_priority_upsert_data_loss.sql @@ -0,0 +1,165 @@ +-- Fix data-loss bug in jobs.upsert_job introduced by +-- 20260509182000_add_priority_scoring_columns.sql. +-- +-- The INSERT body coerces NULL p_priority_score to 0 (line 113 of the +-- original migration). That means excluded.priority_score is NEVER NULL +-- inside the ON CONFLICT DO UPDATE branch — it's either the caller's +-- value or 0. +-- +-- The original UPDATE branch reads: +-- priority_score = case +-- when excluded.priority_score is not null then excluded.priority_score +-- else jobs.jobs.priority_score +-- end +-- Because the case condition is ALWAYS TRUE, every unscored re-upsert +-- overwrites the existing priority_score with 0. Production already lost +-- priority history this way for any row that was re-ingested without a +-- p_priority_score on the second call. +-- +-- Fix: branch on the function PARAMETER p_priority_score (which IS nullable +-- by design) instead of the excluded row. Same correction applied to +-- priority_tier, priority_scorer_version, priority_signals, priority_scored_at. +-- +-- Signature is unchanged so the existing public.upsert_job wrapper and all +-- callers continue to work without modification. + +create or replace function jobs.upsert_job( + p_source text, + p_identity_hash text, + p_job_title text, + p_company_name text, + p_location text, + p_salary text, + p_post_time text, + p_apply_url text, + p_external_url text, + p_job_description text, + p_description_hash text, + p_raw_record jsonb, + p_raw_hash text, + p_url text default null, + p_url_hash text default null, + p_source_channel text default 'unknown', + p_apply_type text default 'unknown', + p_priority_score numeric default null, + p_priority_tier text default null, + p_priority_scorer_version text default null, + p_priority_signals jsonb default null +) +returns uuid +language plpgsql +security definer +as $$ +declare + v_id uuid; +begin + insert into jobs.jobs ( + source, + identity_hash, + job_title, + company_name, + location, + salary, + post_time, + apply_url, + external_url, + job_description, + description_hash, + raw_record, + raw_hash, + url, + url_hash, + source_channel, + apply_type, + priority_score, + priority_tier, + priority_scorer_version, + priority_signals, + first_seen_at, + last_seen_at, + ingest_count, + created_at, + updated_at + ) + values ( + p_source, + p_identity_hash, + nullif(p_job_title, ''), + nullif(p_company_name, ''), + nullif(p_location, ''), + nullif(p_salary, ''), + nullif(p_post_time, ''), + nullif(p_apply_url, ''), + nullif(p_external_url, ''), + nullif(p_job_description, ''), + nullif(p_description_hash, ''), + coalesce(p_raw_record, '{}'::jsonb), + p_raw_hash, + nullif(p_url, ''), + nullif(p_url_hash, ''), + nullif(p_source_channel, ''), + nullif(p_apply_type, ''), + case when p_priority_score is not null then p_priority_score else 0 end, + coalesce(nullif(p_priority_tier, ''), 'unknown'), + coalesce(nullif(p_priority_scorer_version, ''), 'job-priority-v1'), + coalesce(p_priority_signals, '{}'::jsonb), + now(), + now(), + 1, + now(), + now() + ) + on conflict (source, identity_hash) + do update set + job_title = coalesce(nullif(excluded.job_title, ''), jobs.jobs.job_title), + company_name = coalesce(nullif(excluded.company_name, ''), jobs.jobs.company_name), + location = coalesce(nullif(excluded.location, ''), jobs.jobs.location), + salary = coalesce(nullif(excluded.salary, ''), jobs.jobs.salary), + post_time = coalesce(nullif(excluded.post_time, ''), jobs.jobs.post_time), + apply_url = coalesce(nullif(excluded.apply_url, ''), jobs.jobs.apply_url), + external_url = coalesce(nullif(excluded.external_url, ''), jobs.jobs.external_url), + job_description = coalesce(nullif(excluded.job_description, ''), jobs.jobs.job_description), + description_hash = coalesce(nullif(excluded.description_hash, ''), jobs.jobs.description_hash), + raw_record = excluded.raw_record, + raw_hash = excluded.raw_hash, + url = coalesce(nullif(excluded.url, ''), jobs.jobs.url), + url_hash = coalesce(nullif(excluded.url_hash, ''), jobs.jobs.url_hash), + source_channel = coalesce(nullif(excluded.source_channel, ''), jobs.jobs.source_channel), + apply_type = coalesce(nullif(excluded.apply_type, ''), jobs.jobs.apply_type), + -- THE FIX: branch on the function parameter (which is honestly nullable), + -- NOT on excluded (which the INSERT body has already coerced to non-null). + priority_score = case + when p_priority_score is not null then p_priority_score + else jobs.jobs.priority_score + end, + priority_tier = case + when p_priority_tier is not null and p_priority_tier <> '' then p_priority_tier + else jobs.jobs.priority_tier + end, + priority_scorer_version = case + when p_priority_scorer_version is not null and p_priority_scorer_version <> '' then p_priority_scorer_version + else jobs.jobs.priority_scorer_version + end, + priority_signals = case + when p_priority_signals is not null then p_priority_signals + else jobs.jobs.priority_signals + end, + priority_scored_at = case + when p_priority_score is not null then now() + else jobs.jobs.priority_scored_at + end, + last_seen_at = now(), + ingest_count = jobs.jobs.ingest_count + 1, + updated_at = now() + returning id into v_id; + + -- Also set priority_scored_at on INSERT when priority_score was provided + -- (the INSERT-side default writes 0 with NULL scored_at, so a successful + -- explicit score needs its scored_at marker too). + if v_id is not null and p_priority_score is not null then + update jobs.jobs set priority_scored_at = now() where id = v_id; + end if; + + return v_id; +end; +$$; diff --git a/supabase/migrations/20260516120100_enable_jobs_jobs_rls.sql b/supabase/migrations/20260516120100_enable_jobs_jobs_rls.sql new file mode 100644 index 0000000..19e60c6 --- /dev/null +++ b/supabase/migrations/20260516120100_enable_jobs_jobs_rls.sql @@ -0,0 +1,31 @@ +-- Enable Row Level Security on jobs.jobs and grant a read-only policy +-- for the anon role. +-- +-- Reason: the autocli-daily microservice's /jobs HTTP endpoint queries +-- this table via the Supabase anon key. The anon key is intentionally +-- public (Supabase design) — the safety boundary is RLS, not key secrecy. +-- Without RLS, the anon key gives whoever has it read/write to every row. +-- +-- Combined with deploy/SPEC.md §5.3 (Cloudflare Access in front of the +-- /jobs endpoint) this gives defence in depth: Access at the edge + +-- Bearer at the app + RLS at the database. Even if the first two fail +-- open, the database itself only exposes SELECT on jobs.jobs to anon — +-- no writes, no other tables in the jobs schema. +-- +-- Writes via sync_autocli_jobs.py continue to use SUPABASE_SERVICE_ROLE_KEY +-- which bypasses RLS. + +alter table jobs.jobs enable row level security; + +-- Anon (and authenticated) clients may read every row. We deliberately do +-- not filter by ownership because all rows are scraped public job postings +-- and the /jobs endpoint serves them as a list. Tighten this policy if +-- per-user filtering becomes a requirement. +create policy anon_read_jobs_jobs on jobs.jobs + for select + to anon, authenticated + using (true); + +-- No INSERT / UPDATE / DELETE policies for anon — those operations remain +-- service-role-only by virtue of RLS being enabled and no permissive +-- policies for those verbs existing. diff --git a/supabase/migrations/20260516120200_grant_anon_read_jobs_jobs.sql b/supabase/migrations/20260516120200_grant_anon_read_jobs_jobs.sql new file mode 100644 index 0000000..b10d5b9 --- /dev/null +++ b/supabase/migrations/20260516120200_grant_anon_read_jobs_jobs.sql @@ -0,0 +1,12 @@ +-- Companion to 20260516120100: RLS by itself doesn't grant SELECT — PostgREST +-- requires both an explicit GRANT and an RLS policy that passes. Without +-- this GRANT, the /jobs endpoint returned count=0 even though the +-- anon_read_jobs_jobs policy USING(true) was active, because the anon role +-- had no SELECT privilege on the table or USAGE on the schema. +-- +-- Supabase auto-grants these for tables in `public` by default; custom +-- schemas exposed via the dashboard's "Exposed schemas" setting still need +-- the GRANTs to be explicit. + +grant usage on schema jobs to anon, authenticated; +grant select on jobs.jobs to anon, authenticated; diff --git a/supabase/migrations/20260519120000_add_sponsor_licence_columns.sql b/supabase/migrations/20260519120000_add_sponsor_licence_columns.sql new file mode 100644 index 0000000..3a016fe --- /dev/null +++ b/supabase/migrations/20260519120000_add_sponsor_licence_columns.sql @@ -0,0 +1,7 @@ +-- Add sponsor licence matching columns to jobs.jobs. +-- Used by scripts/sponsor_filter.py to record Gov.uk Licensed Sponsors matches. + +ALTER TABLE jobs.jobs + ADD COLUMN IF NOT EXISTS has_sponsor_licence boolean DEFAULT false, + ADD COLUMN IF NOT EXISTS sponsor_match_score numeric(5,2), + ADD COLUMN IF NOT EXISTS sponsor_match_name text; diff --git a/tests/test_clean_linkedin_jobs.py b/tests/test_clean_linkedin_jobs.py new file mode 100644 index 0000000..d0fc890 --- /dev/null +++ b/tests/test_clean_linkedin_jobs.py @@ -0,0 +1,286 @@ +"""Tests for LinkedIn job cleaning — TDD RED phase. + +Tests that are expected to fail because the corresponding +features (source_channel, apply_type) are not yet implemented. +""" + +import json +import unittest + + +def _build_raw_record(**overrides) -> dict: + """Helper to build a minimal raw LinkedIn job record.""" + defaults = { + "title": "Cloud Engineer", + "company": "Example Corp", + "location": "Remote", + "url": "https://www.linkedin.com/jobs/view/123", + "external_url": "https://example.com/apply", + } + defaults.update(overrides) + return defaults + + +class TestSourceNormalization(unittest.TestCase): + """source=linkedin_recommended → source=linkedin, source_channel=recommended""" + + def test_linkedin_recommended_source_maps_to_linkedin(self): + """linkedin_recommended source should become source=linkedin.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record() + result = clean_job_record(record) + + self.assertEqual(result["source"], "linkedin") + + def test_linkedin_recommended_source_sets_channel(self): + """linkedin_recommended source should set source_channel=recommended.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record() + result = clean_job_record(record) + + self.assertEqual(result["source_channel"], "recommended") + + def test_other_source_preserves_channel(self): + """Non-linkedin source should leave source_channel as unknown.""" + from scripts.clean_linkedin_jobs import clean_job_record + + # Simulate a non-LinkedIn source by setting source_prefix override + record = _build_raw_record() + result = clean_job_record(record, source_prefix="indeed") + + self.assertEqual(result["source"], "indeed") + self.assertEqual(result["source_channel"], "unknown") + + +class TestApplyTypeMapping(unittest.TestCase): + """easy_apply → apply_type mapping.""" + + def test_easy_apply_true_maps_to_easy_apply(self): + """easy_apply=True should set apply_type='easy_apply'.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(easy_apply="true") + result = clean_job_record(record) + + self.assertEqual(result["apply_type"], "easy_apply") + + def test_easy_apply_false_maps_to_external(self): + """easy_apply=False should set apply_type='external'.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(easy_apply="false") + result = clean_job_record(record) + + self.assertEqual(result["apply_type"], "external") + + def test_missing_easy_apply_maps_to_unknown(self): + """Missing easy_apply should set apply_type='unknown'.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record() + # Ensure no easy_apply key at all + record.pop("easy_apply", None) + result = clean_job_record(record) + + self.assertEqual(result["apply_type"], "unknown") + + def test_easy_apply_boolean_true_from_raw_json(self): + """Boolean True easy_apply from JSON should map to 'easy_apply'.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(easy_apply=True) + result = clean_job_record(record) + + self.assertEqual(result["apply_type"], "easy_apply") + + +class TestRawRecordPreservation(unittest.TestCase): + """raw_record should retain original input fields.""" + + def test_raw_record_contains_original_easy_apply(self): + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(easy_apply="true") + result = clean_job_record(record) + + self.assertIn("raw_record", result) + self.assertEqual(result["raw_record"]["easy_apply"], "true") + + def test_raw_record_contains_title_company(self): + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(title="Senior Engineer", company="Acme") + result = clean_job_record(record) + + self.assertEqual(result["raw_record"]["title"], "Senior Engineer") + self.assertEqual(result["raw_record"]["company"], "Acme") + + +class TestUrlAndApplyUrlMapping(unittest.TestCase): + """URL and apply_url mapping from clean_job_record through to sync row.""" + + def test_url_is_raw_linkedin_url_not_normalized(self): + """url should be the raw LinkedIn URL (url_normalized is separate).""" + from scripts.clean_linkedin_jobs import map_row_for_sync + + cleaned = { + "url": "https://www.linkedin.com/jobs/view/123?trk=guest", + "external_url": "", + "url_normalized": "https://www.linkedin.com/jobs/view/123", + "url_hash": "abc123", + "apply_type": "easy_apply", + "source": "linkedin", + "source_channel": "recommended", + "raw_record": {}, + } + row = map_row_for_sync(cleaned) + # url is the LinkedIn job URL for reference + self.assertEqual(row["url"], "https://www.linkedin.com/jobs/view/123?trk=guest") + # apply_url is empty for easy_apply + self.assertEqual(row["apply_url"], "") + + def test_external_job_apply_url_is_external_url(self): + """External jobs should have apply_url set to external_url in sync row.""" + from scripts.clean_linkedin_jobs import map_row_for_sync + + cleaned = { + "url": "https://www.linkedin.com/jobs/view/456", + "external_url": "https://example.com/apply", + "url_normalized": "https://www.linkedin.com/jobs/view/456", + "url_hash": "def456", + "apply_type": "external", + "source": "linkedin", + "source_channel": "recommended", + "raw_record": {}, + } + row = map_row_for_sync(cleaned) + self.assertEqual(row["url"], "https://www.linkedin.com/jobs/view/456") + self.assertEqual(row["apply_url"], "https://example.com/apply") + + def test_easy_apply_job_has_empty_apply_url(self): + """Easy apply jobs should have empty apply_url in sync row.""" + from scripts.clean_linkedin_jobs import map_row_for_sync + + cleaned = { + "url": "https://www.linkedin.com/jobs/view/789", + "external_url": "", + "url_normalized": "https://www.linkedin.com/jobs/view/789", + "url_hash": "ghi789", + "apply_type": "easy_apply", + "source": "linkedin", + "source_channel": "recommended", + "raw_record": {}, + } + row = map_row_for_sync(cleaned) + self.assertEqual(row["url"], "https://www.linkedin.com/jobs/view/789") + self.assertEqual(row["apply_url"], "") + + +class TestLinkedInValidationRejection(unittest.TestCase): + """LinkedIn records without easy_apply=true or external_url should be rejected.""" + + def test_rejects_linkedin_without_external_or_easy_apply(self): + """LinkedIn row with easy_apply=false and no external_url should be rejected.""" + from scripts.clean_linkedin_jobs import validate_record + + record = { + "title": "Engineer", + "company": "Acme", + "location": "Remote", + "url": "https://linkedin.com/jobs/view/123", + "external_url": "", + "source": "linkedin", + "easy_apply": False, + } + ok, reason = validate_record(record) + self.assertFalse(ok) + self.assertIn("external_url", reason.lower()) + + def test_rejects_linkedin_missing_apply_and_url(self): + """LinkedIn row without easy_apply field and no external_url should be rejected.""" + from scripts.clean_linkedin_jobs import validate_record + + record = { + "title": "Engineer", + "company": "Acme", + "location": "Remote", + "url": "https://linkedin.com/jobs/view/123", + "external_url": "", + "source": "linkedin", + } + ok, reason = validate_record(record) + self.assertFalse(ok) + self.assertIn("external_url", reason.lower()) + + def test_accepts_linkedin_with_easy_apply_and_no_external(self): + """LinkedIn row with easy_apply=true but no external_url should be accepted.""" + from scripts.clean_linkedin_jobs import validate_record + + record = { + "title": "Engineer", + "company": "Acme", + "location": "Remote", + "url": "https://linkedin.com/jobs/view/123", + "external_url": "", + "source": "linkedin", + "easy_apply": True, + } + ok, reason = validate_record(record) + self.assertTrue(ok) + + def test_accepts_linkedin_with_external_url(self): + """LinkedIn row with external_url but easy_apply=false should be accepted.""" + from scripts.clean_linkedin_jobs import validate_record + + record = { + "title": "Engineer", + "company": "Acme", + "location": "Remote", + "url": "https://linkedin.com/jobs/view/123", + "external_url": "https://example.com/apply", + "source": "linkedin", + "easy_apply": False, + } + ok, reason = validate_record(record) + self.assertTrue(ok) + + def test_accepts_non_linkedin_without_external(self): + """Non-LinkedIn row without external_url should not be rejected.""" + from scripts.clean_linkedin_jobs import validate_record + + record = { + "title": "Engineer", + "company": "Acme", + "location": "Remote", + "url": "https://indeed.com/job/123", + "external_url": "", + "source": "indeed", + } + ok, reason = validate_record(record) + self.assertTrue(ok) + + +class TestUrlExtraction(unittest.TestCase): + """URL should be extracted from alternative LinkedIn field names.""" + + def test_extracts_url_from_linkedin_url_field(self): + """When url is empty, extract from linkedin_url.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(url="", linkedin_url="https://linkedin.com/jobs/view/123") + result = clean_job_record(record) + self.assertEqual(result["url"], "https://linkedin.com/jobs/view/123") + + def test_extracts_url_from_source_url_field(self): + """When url is missing, extract from source_url.""" + from scripts.clean_linkedin_jobs import clean_job_record + + record = _build_raw_record(url="", source_url="https://linkedin.com/jobs/view/456") + result = clean_job_record(record) + self.assertEqual(result["url"], "https://linkedin.com/jobs/view/456") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_job_priority_scorer.py b/tests/test_job_priority_scorer.py new file mode 100644 index 0000000..af7b0d8 --- /dev/null +++ b/tests/test_job_priority_scorer.py @@ -0,0 +1,926 @@ +"""Comprehensive test suite for job_priority_scorer.py. + +Covers all 8 scoring components, penalty system, tier mapping, +hard-reject guard, and integration scenarios. +""" + +import unittest +from datetime import date, datetime, timezone + + +# ---- +# Helpers +# ---- + +_LONG_ENOUGH_JD = ( + "We are building cloud-native systems with Python and React. " + "This is a full-time engineering role with a great team and " + "competitive compensation package. We are seeking an experienced " + "software engineer to join our platform team and help us build " + "scalable, reliable systems. You will work with cutting-edge " + "technologies and contribute to our engineering culture with modern " + "tooling and best practices across the entire development lifecycle." +) # ~450 chars + + +def _make_job(**overrides) -> dict: + """Helper to build a test job dict. + + The default job_description is intentionally >= 100 characters to avoid + triggering the DUPLICATE_LOW_QUALITY_PENALTY (-5) check that penalises + extremely short (< 100 chars) descriptions. + """ + defaults = { + "job_title": "Software Engineer", + "company_name": "Test Corp", + "location": "Remote, UK", + "salary": "GBP 70,000 - 90,000", + "apply_url": "https://testcorp.com/careers/123", + "external_url": "", + "job_description": _LONG_ENOUGH_JD, + "post_time": "2026-05-08T10:00:00Z", + } + defaults.update(overrides) + return defaults + + +# ========================================================================= +# Tests: scoring_text normalization +# ========================================================================= + +class TestNormalization(unittest.TestCase): + """normalize_scoring_text: cleanup, zero-width chars, noise audit.""" + + def _normalize(self, text: str): + from scripts.job_priority_scorer import normalize_scoring_text + return normalize_scoring_text(text) + + def test_clean_text_passes_through(self): + t = "Senior Software Engineer at Google" + cleaned, signals = self._normalize(t) + self.assertEqual(cleaned, t) + self.assertFalse(signals["was_noisy"]) + + def test_zero_width_chars_removed(self): + raw = "Senior​Software‌Engineer" + cleaned, signals = self._normalize(raw) + self.assertEqual(cleaned, "SeniorSoftwareEngineer") + self.assertTrue(signals["was_noisy"]) + self.assertEqual(signals["zero_width_removed"], 2) + + def test_control_chars_removed(self): + raw = "Hello\x00World\x1FTech" + cleaned, signals = self._normalize(raw) + self.assertEqual(cleaned, "HelloWorldTech") + self.assertTrue(signals["was_noisy"]) + + def test_decorative_symbols_replaced(self): + raw = "Engineer \U0001f680 Python" + cleaned, signals = self._normalize(raw) + self.assertNotIn("\U0001f680", cleaned) + self.assertTrue(signals["was_noisy"]) + + def test_repeated_punctuation_collapsed(self): + raw = "Great!!! opportunity..!!" + cleaned, signals = self._normalize(raw) + self.assertEqual(cleaned, "Great! opportunity.!") + self.assertTrue(signals["was_noisy"]) + + def test_whitespace_collapsed(self): + raw = "Senior Engineer\tLondon\nUK" + cleaned, signals = self._normalize(raw) + self.assertEqual(cleaned, "Senior Engineer London UK") + # Single whitespace collapse removes 1/26 chars (3.8%) which is below + # the 5 % threshold, so was_noisy stays False. + self.assertFalse(signals["was_noisy"]) + + def test_empty_text(self): + cleaned, signals = self._normalize("") + self.assertEqual(cleaned, "") + self.assertFalse(signals["was_noisy"]) + + def test_whitespace_only(self): + cleaned, signals = self._normalize(" \t \n ") + self.assertEqual(cleaned, "") + self.assertTrue(signals["was_noisy"]) + + def test_removal_ratio_nonzero_for_noisy(self): + _, signals = self._normalize("Hello\x00World\nTest!!!") + self.assertGreater(signals["removal_ratio"], 0) + + +# ========================================================================= +# Tests: compensation scoring +# ========================================================================= + +class TestCompensation(unittest.TestCase): + """score_compensation: salary parsing and table lookup. + + NOTE: Currency-code prefixes (EUR, USD) are not converted to GBP -- + only currency symbols ($, £, €) trigger conversion. Strings like + "EUR 60k" are parsed as GBP values because EUR is not recognised as a + currency symbol. + """ + + def _score(self, salary: str) -> float: + from scripts.job_priority_scorer import score_compensation + return score_compensation(salary)[0] + + def test_gbp_range_midpoint(self): + """GBP 50k-70k -> midpoint 60k -> score 12.""" + self.assertEqual(self._score("GBP 50,000 - 70,000"), 12) + + def test_gbp_single_value(self): + """GBP 80k -> score 15.""" + self.assertEqual(self._score("GBP 80,000"), 15) + + def test_pound_prefix(self): + """pound prefix -> midpoint 70k -> score 15.""" + self.assertEqual(self._score("£60,000 - £80,000"), 15) + + def test_dollar_prefix_range(self): + """$70k-90k -> $80k*0.79=63.2k ($ converted by symbol) -> score 12.""" + self.assertEqual(self._score("$70,000 - $90,000"), 12) + + def test_euro_prefix(self): + """EUR 50k-70k -> midpoint 60k (treated as GBP, EUR code not converted).""" + self.assertEqual(self._score("EUR 50,000 - 70,000"), 12) + + def test_usd_code_no_conversion(self): + """USD text code is NOT converted to GBP; midpoint 90k treated as GBP 90k -> score 18.""" + self.assertEqual(self._score("USD 80,000 - 100,000"), 18) + + def test_high_salary(self): + self.assertEqual(self._score("GBP 150,000"), 20) + + def test_low_salary(self): + self.assertEqual(self._score("GBP 25,000"), 5) + + def test_no_salary(self): + self.assertEqual(self._score(""), 6) + + def test_unparseable(self): + self.assertEqual(self._score("competitive"), 6) + + +# ========================================================================= +# Tests: role fit scoring +# ========================================================================= + +class TestRoleFit(unittest.TestCase): + """score_role_fit: title + JD matching against positive/negative terms.""" + + def _score(self, title: str, jd: str = "") -> float: + from scripts.job_priority_scorer import score_role_fit + return score_role_fit(title, jd)[0] + + def test_positive_title_match(self): + self.assertGreater(self._score("Senior Software Engineer"), 0) + + def test_negative_title_match(self): + self.assertLess(self._score("Data Annotation Specialist"), 10) + + def test_mixed_title_and_jd(self): + s = self._score( + "Backend Developer", + "Strong Python and Rust skills. We use React on the frontend.", + ) + self.assertGreater(s, 5) + + def test_neutral_title_no_jd(self): + self.assertGreaterEqual(self._score("Manager"), 0) + + def test_all_negative_title(self): + self.assertLessEqual(self._score("Sales Marketing Recruiter"), 5) + + def test_score_clamped_0_20(self): + from scripts.job_priority_scorer import score_role_fit + s, signals = score_role_fit( + "Software Engineer Full Stack Developer Backend Engineer", + "Python, React, TypeScript, Cloud, DevOps, AI, LLM, " + "Node, Rust, GenAI, Platform, SRE", + ) + self.assertLessEqual(s, 20) + self.assertGreaterEqual(s, 0) + + +# ========================================================================= +# Tests: seniority scoring +# ========================================================================= + +class TestSeniority(unittest.TestCase): + """score_seniority: priority-based signal detection.""" + + def _score(self, title: str, jd: str = "") -> float: + from scripts.job_priority_scorer import score_seniority + return score_seniority(title, jd)[0] + + def test_senior_engineer(self): + self.assertGreaterEqual(self._score("Senior Software Engineer"), 10) + + def test_junior_engineer(self): + self.assertLess(self._score("Junior Software Engineer"), 10) + + def test_intern_overrides(self): + self.assertLess(self._score("Senior Software Engineer Intern"), 6) + + def test_principal_engineer(self): + self.assertGreater(self._score("Principal Engineer"), 5) + + def test_staff_engineer(self): + self.assertGreater(self._score("Staff Engineer"), 9) + + def test_no_seniority_signal(self): + self.assertEqual(self._score("Software Engineer"), 6) + + def test_mid_level(self): + s = self._score("Mid-Level Developer") + self.assertGreater(s, 8) + self.assertLess(s, 10) + + def test_graduate_role(self): + self.assertLess(self._score("Graduate Software Engineer"), 6) + + +# ========================================================================= +# Tests: work arrangement scoring +# ========================================================================= + +class TestWorkArrangement(unittest.TestCase): + """score_work_arrangement: location + worktype detection. + + IMPORTANT: score_work_arrangement takes (workplace_type, location, + scoring_text). The workplace_type must be passed explicitly -- the + function does not infer it from the description text. + """ + + def _score(self, location: str = "", jd: str = "", + worktype: str = "") -> float: + from scripts.job_priority_scorer import score_work_arrangement + return score_work_arrangement(worktype, location, jd)[0] + + def test_remote_uk(self): + self.assertEqual(self._score("London, UK", "", "remote"), 10) + + def test_remote_non_uk(self): + self.assertEqual(self._score("New York, NY", "", "remote"), 5) + + def test_hybrid_london(self): + self.assertEqual(self._score("London, UK", "", "hybrid"), 8) + + def test_onsite_london(self): + self.assertEqual(self._score("London, UK", "", "on-site"), 5) + + def test_onsite_non_uk(self): + self.assertEqual(self._score("New York, NY", "", "on-site"), 3) + + def test_remote_only_no_uk_location(self): + self.assertEqual(self._score("Remote", "", "remote"), 5) + + def test_not_uk(self): + self.assertEqual(self._score("Tokyo, Japan"), 0) + + def test_uk_in_jd_not_location(self): + self.assertEqual(self._score("Remote", "based in the UK", "remote"), 10) + + +# ========================================================================= +# Tests: application path scoring +# ========================================================================= + +class TestApplicationPath(unittest.TestCase): + """score_application_path: ATS vs clean URL vs easy_apply detection.""" + + def _score(self, apply_url: str = "", external_url: str = "", + apply_type: str = "", jd: str = "", + has_salary: bool = True, has_usable_jd: bool = True) -> float: + from scripts.job_priority_scorer import score_application_path + return score_application_path( + apply_url, external_url, apply_type, jd, + has_salary, has_usable_jd, + )[0] + + def test_ats_workday_url(self): + self.assertEqual( + self._score(apply_url="https://acme.wd5.myworkdayjobs.com/Careers/123"), 8) + + def test_ats_greenhouse_url(self): + self.assertEqual( + self._score(apply_url="https://boards.greenhouse.io/acme/jobs/456"), 8) + + def test_clean_company_url(self): + self.assertEqual( + self._score(apply_url="https://acme.com/careers/789"), 7) + + def test_easy_apply_usable(self): + self.assertEqual( + self._score(apply_type="easy_apply", has_salary=True, has_usable_jd=True), 5) + + def test_easy_apply_weak(self): + self.assertEqual( + self._score(apply_type="easy_apply", has_salary=False, has_usable_jd=False), 1) + + def test_linkedin_apply_with_clean_external(self): + self.assertEqual( + self._score( + apply_url="https://linkedin.com/jobs/view/123", + external_url="https://company.com/careers/456"), 7) + + def test_linkedin_apply_with_ats_external(self): + self.assertEqual( + self._score( + apply_url="https://linkedin.com/jobs/view/123", + external_url="https://acme.wd5.myworkdayjobs.com/Careers/456"), 8) + + def test_aggregator_in_jd_text(self): + """Aggregator detection is JD-text-based, not URL-based.""" + self.assertEqual( + self._score(jd="Posted via efinancialcareers"), 2) + + def test_missing_everything(self): + self.assertEqual(self._score(), 0) + + +# ========================================================================= +# Tests: freshness scoring +# ========================================================================= + +class TestFreshness(unittest.TestCase): + """score_freshness: recency-based + rank-based scoring.""" + + def _score(self, posted_time: str = "", raw_record: dict = None, + ref_date: date = None): + from scripts.job_priority_scorer import score_freshness + return score_freshness(posted_time, raw_record or {}, ref_date) + + def test_posted_today(self): + today = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + total, signals = self._score(posted_time=today) + self.assertEqual(signals["freshness_score"], 5) + + def test_posted_5_days_ago(self): + from datetime import timedelta + dt = (datetime.now(timezone.utc) - timedelta(days=5)).strftime("%Y-%m-%dT%H:%M:%SZ") + total, signals = self._score(posted_time=dt) + self.assertEqual(signals["freshness_score"], 4) + + def test_posted_20_days_ago(self): + from datetime import timedelta + dt = (datetime.now(timezone.utc) - timedelta(days=20)).strftime("%Y-%m-%dT%H:%M:%SZ") + total, signals = self._score(posted_time=dt) + self.assertEqual(signals["freshness_score"], 1) + + def test_posted_over_30_days(self): + from datetime import timedelta + dt = (datetime.now(timezone.utc) - timedelta(days=45)).strftime("%Y-%m-%dT%H:%M:%SZ") + total, signals = self._score(posted_time=dt) + self.assertEqual(signals["freshness_score"], 0) + + def test_no_post_time(self): + total, signals = self._score() + self.assertEqual(signals["freshness_score"], 0) + + def test_rank_50(self): + total, signals = self._score(raw_record={"rank": 50}) + self.assertEqual(signals["rank_score"], 5) + + def test_rank_200(self): + total, signals = self._score(raw_record={"rank": 200}) + self.assertEqual(signals["rank_score"], 3) + + def test_fixed_reference_date(self): + total, signals = self._score( + posted_time="2026-05-01T10:00:00Z", + ref_date=date(2026, 5, 8), + ) + self.assertEqual(signals["freshness_score"], 4) + + def test_freshness_plus_rank_total(self): + total, signals = self._score( + posted_time="2026-05-07T10:00:00Z", + raw_record={"rank": 50}, + ref_date=date(2026, 5, 9), + ) + self.assertEqual(signals["freshness_score"], 5) + self.assertEqual(signals["rank_score"], 5) + self.assertEqual(total, 10.0) + + +# ========================================================================= +# Tests: data completeness scoring +# ========================================================================= + +class TestDataCompleteness(unittest.TestCase): + """score_data_completeness: 10x1-point checks. + + IMPORTANT: the 6th check (has_jd_raw) looks for keys 'jd', 'description', + or 'jobDescription' in the *raw_record* (not the normalized job_description). + """ + + def _score(self, job: dict, apply_type: str = "", raw_record: dict = None): + from scripts.job_priority_scorer import score_data_completeness + return score_data_completeness( + job_title=job.get("job_title", ""), + company_name=job.get("company_name", ""), + location=job.get("location", ""), + job_description=job.get("job_description", ""), + salary=job.get("salary", ""), + posted_time=job.get("post_time", ""), + apply_url=job.get("apply_url", ""), + external_url=job.get("external_url", ""), + raw_record=raw_record or job, + apply_type=apply_type, + ) + + def test_complete_job_scores_10(self): + """A fully populated job scores 10. To get jd_raw we pass a + raw_record that contains a 'jd' key. The job_description must be + >= 500 chars for has_jd_length_500; apply_type must be easy_apply.""" + raw = _make_job( + job_description="A " * 251, # 502 chars -- satisfies has_jd_length_500 + apply_type="easy_apply", + ) + raw["jd"] = raw["job_description"] + score, signals = self._score(raw, raw_record=raw) + self.assertEqual(score, 10) + + def test_empty_job_scores_0(self): + score, signals = self._score({}) + self.assertEqual(score, 0) + + def test_missing_title(self): + score, signals = self._score(_make_job(job_title="")) + self.assertLess(score, 10) + + def test_missing_company(self): + score, signals = self._score(_make_job(company_name="")) + self.assertLess(score, 10) + + def test_missing_location(self): + score, signals = self._score(_make_job(location="")) + self.assertLess(score, 10) + + def test_missing_salary(self): + score, signals = self._score(_make_job(salary="")) + self.assertLess(score, 10) + + def test_missing_jd(self): + score, signals = self._score(_make_job(job_description="")) + self.assertLess(score, 10) + + def test_missing_post_time(self): + score, signals = self._score(_make_job(post_time="")) + self.assertLess(score, 10) + + def test_easy_apply_detected(self): + score, signals = self._score( + _make_job(salary="", apply_url="", external_url="", job_description=""), + apply_type="easy_apply", + ) + self.assertGreater(score, 0) + self.assertTrue(signals.get("has_easy_apply")) + + +# ========================================================================= +# Tests: source quality scoring +# ========================================================================= + +class TestSourceQuality(unittest.TestCase): + """score_source_quality: recruiter/aggregator/short-JD penalties.""" + + _LONG_JD = ( + "Senior software engineer with strong Python and React skills needed " + "for our growing platform team. Full-stack development with TypeScript, " + "Node.js, and cloud infrastructure. We offer competitive salary and " + "benefits package. Join our engineering team and help build the next " + "generation of our platform. This role involves backend development, " + "API design, and mentoring junior team members. Additional padding to " + "ensure this description comfortably exceeds three hundred characters " + "so the short JD penalty threshold is not triggered during the test." + ) # > 300 chars to avoid jd_too_short penalty + + def _score(self, job: dict): + from scripts.job_priority_scorer import score_source_quality + from scripts.job_priority_config import MIN_JD_LENGTH_USABLE + scoring_text = job.get("job_description", "") + salary = job.get("salary", "") + apply_type = job.get("apply_type", "") + apply_url = job.get("apply_url", "") + external_url = job.get("external_url", "") + has_usable_jd = len(scoring_text.strip()) >= MIN_JD_LENGTH_USABLE + # Default applicant_count to a numeric value so the + # weak_applicant_count penalty does not fire unless a test + # explicitly overrides it. + applicant_count = job.get("applicant_count", "5") + raw_record = job + rank = None + return score_source_quality( + company_name=job.get("company_name", ""), + scoring_text=scoring_text, + salary=salary, + apply_type=apply_type, + apply_url=apply_url, + external_url=external_url, + has_usable_jd=has_usable_jd, + applicant_count=applicant_count, + raw_record=raw_record, + rank=rank, + ) + + def test_clean_job_scores_10(self): + score, signals = self._score( + _make_job(job_description=self._LONG_JD)) + self.assertEqual(score, 10) + + def test_recruiter_company_penalty(self): + score, signals = self._score( + _make_job(company_name="Harnham Recruitment", + job_description=self._LONG_JD)) + self.assertEqual(score, 6) + + def test_recruiter_phrase_penalty(self): + score, signals = self._score( + _make_job( + job_description=( + "We are partnered with a leading tech company to fill " + "senior engineering positions. This description provides " + "enough context to avoid the short JD penalty. The role " + "involves backend development with Python and cloud " + "infrastructure management. Additional text to ensure " + "the total length exceeds the three hundred character " + "minimum threshold for the short JD penalty so that only " + "the recruiter phrase penalty is triggered for this case." + ), + )) + self.assertEqual(score, 7) + + def test_missing_salary_penalty(self): + score, signals = self._score( + _make_job(salary="", job_description=self._LONG_JD)) + self.assertEqual(score, 8) + + def test_short_jd_penalty(self): + score, signals = self._score( + _make_job(job_description="Short.", + applicant_count="5")) # suppress weak_applicant + self.assertEqual(score, 8) + + def test_easy_apply_no_owned_url_penalty(self): + score, signals = self._score( + _make_job(apply_url="https://linkedin.com/jobs/view/123", + external_url="", + job_description=self._LONG_JD)) + self.assertEqual(score, 10) # no apply_type set, so no easy_apply penalty + + def test_multiple_penalties_stack(self): + score, signals = self._score( + _make_job(company_name="Robert Half Recruitment", + salary="", + job_description="Brief.")) + self.assertGreaterEqual(score, 0) + self.assertLess(score, 10) + + +# ========================================================================= +# Tests: penalty system +# ========================================================================= + +class TestPenalties(unittest.TestCase): + """apply_penalties: all 8 penalty checks.""" + + def _apply(self, score: float, job: dict, signals: dict = None): + from scripts.job_priority_scorer import apply_penalties + return apply_penalties(score, signals or {}, job) + + def test_no_penalties(self): + s, reasons = self._apply(50, _make_job()) + self.assertEqual(s, 50) + self.assertEqual(reasons, []) + + def test_scam_penalty(self): + """Scam pattern: 'earn money fast from home' + 'no experience necessary'.""" + job = _make_job( + job_description=( + "Earn money fast from home! No experience necessary - we will " + "train you. This is padding to exceed the one hundred character " + "minimum so that the low quality duplicate penalty does not " + "interfere with the scam penalty test." + ), + ) + s, reasons = self._apply(50, job, {}) + self.assertEqual(s, 30) # 50 - 20 + + def test_non_engineering_role(self): + """Title must NOT have positive role terms AND must have negative ones. + Avoid 'commission' in the JD to prevent the UNPAID_COMMISSION penalty + from firing on top of the non-engineering penalty.""" + s, reasons = self._apply(50, _make_job( + job_title="Sales Representative", + job_description=( + "Sales and marketing position with competitive compensation " + "and client relationship management responsibilities for this " + "important customer-facing role." + ), + ), {}) + self.assertEqual(s, 35) + + def test_low_info_recruiter(self): + """Test the low-info recruiter penalty (all 4 conditions required). + JD must be >= 100 chars (avoid low_quality) but < 500 chars (not usable).""" + job = _make_job( + company_name="Recruitment Agency Ltd", + job_description=( + "Our client is looking for a talented engineer. Apply now for " + "this exciting opportunity with great benefits and compensation." + ), + salary="", + apply_type="easy_apply", + ) + s, reasons = self._apply(50, job, {}) + self.assertEqual(s, 40) # 50 - 10 + + def test_aggregator_repost(self): + """Test aggregator repost penalty (3 conditions required).""" + s, reasons = self._apply( + 50, _make_job( + job_description=( + "Posted via efinancialcareers. This description is long " + "enough to avoid the low quality duplicate penalty threshold " + "of one hundred characters for this test scenario." + ), + salary="", + apply_url="", + ), {}) + self.assertEqual(s, 42) + + def test_noisy_text_penalty(self): + """Noise penalty requires removal_ratio > 0.05 AND clean_len < 500.""" + from scripts.job_priority_scorer import normalize_scoring_text + short_noisy = "Hello\x00World" + parsed, noise = normalize_scoring_text(short_noisy) + s, reasons = self._apply(50, _make_job(job_description=short_noisy), + {"noise": noise}) + self.assertIn("noisy_text", " ".join(reasons)) + + def test_duplicate_low_quality(self): + """Extremely short JD (< 100 chars) triggers penalty.""" + s, reasons = self._apply(50, _make_job(job_description="Too short"), {}) + self.assertEqual(s, 45) + + def test_multiple_penalties(self): + job = _make_job( + job_title="Recruiter", + company_name="Agency Recruiters Inc", + job_description="Earn money fast from home! No experience.", + ) + s, reasons = self._apply(50, job, {}) + self.assertLess(s, 50) + + +# ========================================================================= +# Tests: hard-reject guard +# ========================================================================= + +class TestHardRejectGuard(unittest.TestCase): + """Hard-reject guard: score<25 with <2 low-value signals -> 'low'.""" + + def test_reject_with_2_signals(self): + from scripts.job_priority_scorer import score_job + job = _make_job( + job_title="Intern", location="Unknown", + salary="", apply_url="", external_url="", job_description="") + r = score_job(job) + self.assertEqual(r.tier, "reject", + f"Should be reject, got tier={r.tier} score={r.score}") + + def test_reject_with_1_signal_overridden_to_low(self): + """Score < 25 but only 1 low-value signal -> overridden to 'low'. + + The only low-value signal is missing_salary. A usable JD and clean + company/title suppress the other low-value checks. + """ + from scripts.job_priority_scorer import score_job + job = _make_job( + salary="", + location="", + apply_url="", + external_url="", + job_description=( + "A fairly long description that has many positive role fit " + "keywords like software engineer, full stack, Python, React, " + "TypeScript, cloud, and devops. This is a genuine engineering " + "role with good details about the position. We need strong " + "engineering skills and experience with modern technologies. " + ), + ) + r = score_job(job) + self.assertEqual(r.tier, "low", + f"Should be 'low' override, got tier={r.tier} score={r.score}") + + +# ========================================================================= +# Tests: score_job integration +# ========================================================================= + +class TestScoreJobIntegration(unittest.TestCase): + """score_job: full pipeline integration tests.""" + + def test_good_senior_dev_scores_medium_or_high(self): + from scripts.job_priority_scorer import score_job + job = _make_job( + job_title="Senior Software Engineer", + company_name="Google", + location="London, UK", + salary="GBP 120,000 - 150,000", + job_description=( + "Lead software engineer building cloud-native systems " + "with Python, React, TypeScript, and Rust. We need strong " + "backend engineering skills for our platform team. " + "Mentor junior developers and drive architecture decisions." + ), + apply_url="https://google.com/careers/123", + ) + r = score_job(job) + self.assertIn(r.tier, ("high", "medium")) + self.assertGreaterEqual(r.score, 50) + + def test_scam_job_tier_reject(self): + """Use text that triggers the scam regex.""" + from scripts.job_priority_scorer import score_job + job = _make_job( + salary="", + apply_url="https://linkedin.com/jobs/view/scam123", + external_url="", + job_description=( + "Earn money fast from home! No experience necessary " + "- we will train you. Unlimited earning potential." + ), + location="Remote", + ) + r = score_job(job) + self.assertEqual(r.tier, "reject", + f"Expected reject, got {r.tier} (score={r.score})") + + def test_ats_good_salary_scores_medium(self): + from scripts.job_priority_scorer import score_job + job = _make_job( + job_title="Full Stack Developer", + company_name="Acme Corp", + location="Remote, UK", + salary="USD 100,000 - 130,000", + apply_url="https://acme.wd5.myworkdayjobs.com/Careers/123", + job_description=( + "Full stack developer with TypeScript, React, and Node.js. " + "We offer competitive compensation and fully remote work." + ), + ) + r = score_job(job) + self.assertGreaterEqual(r.tier, "medium") + self.assertGreaterEqual(r.score, 50) + + def test_empty_job_rejected(self): + from scripts.job_priority_scorer import score_job + r = score_job({}) + self.assertEqual(r.tier, "reject") + + def test_deterministic(self): + from scripts.job_priority_scorer import score_job + job = _make_job() + r1 = score_job(job) + r2 = score_job(job) + self.assertEqual(r1.score, r2.score) + self.assertEqual(r1.tier, r2.tier) + self.assertEqual(r1.signals, r2.signals) + + def test_score_result_frozen(self): + from scripts.job_priority_scorer import ScoreResult + r = ScoreResult(score=50.0, tier="medium", version="v1", + signals={}, scoring_text="test") + with self.assertRaises(AttributeError): + r.score = 60.0 # type: ignore[misc] + + def test_version_set(self): + from scripts.job_priority_scorer import score_job, SCORER_VERSION + r = score_job(_make_job()) + self.assertEqual(r.version, SCORER_VERSION) + + def test_score_clamped_0_100(self): + from scripts.job_priority_scorer import score_job + r = score_job(_make_job()) + self.assertGreaterEqual(r.score, 0) + self.assertLessEqual(r.score, 100) + + def test_scoring_text_in_result(self): + from scripts.job_priority_scorer import score_job + r = score_job(_make_job()) + self.assertTrue(len(r.scoring_text) > 0) + + def test_signals_in_result(self): + """Signals are nested dicts: signals['compensation']['score'] etc.""" + from scripts.job_priority_scorer import score_job + r = score_job(_make_job()) + self.assertIn("compensation", r.signals) + self.assertIn("role_fit", r.signals) + self.assertIn("seniority", r.signals) + self.assertIn("work_arrangement", r.signals) + self.assertIn("application_friction", r.signals) + self.assertIn("freshness", r.signals) + self.assertIn("data_quality", r.signals) + self.assertIn("source_quality", r.signals) + self.assertIn("penalties", r.signals) + + def test_tier_high_possible(self): + from scripts.job_priority_scorer import score_job + job = _make_job( + job_title="Senior Staff Software Engineer", + company_name="TopTech", + location="London, UK", + salary="GBP 150,000 - 200,000", + apply_url="https://toptech.com/careers/lead", + job_description=( + "Senior staff software engineer to lead our cloud platform team. " + "We use Python, Rust, TypeScript, React, and Node.js at scale. " + "Drive architecture decisions, mentor engineers, build " + "distributed systems. We need deep backend engineering expertise " + "and AI/ML experience. Site reliability and DevOps practices " + "are core to this leadership role." + ), + ) + r = score_job(job) + self.assertGreaterEqual(r.tier, "high", + f"Expected high, got {r.tier} (score={r.score})") + + +# ========================================================================= +# Tests: Reference date handling +# ========================================================================= + +class TestReferenceDate(unittest.TestCase): + """score_job: reference_date parameter. + + NOTE: the freshness signals live under r.signals['freshness'] which + contains top-level keys like 'freshness_score', 'rank_score', etc. + """ + + def test_fixed_reference_date(self): + from scripts.job_priority_scorer import score_job + job = _make_job(post_time="2026-05-01T10:00:00Z") + r = score_job(job, reference_date=date(2026, 5, 8)) + self.assertEqual(r.signals["freshness"]["freshness_score"], 4) + + def test_reference_date_as_date_object(self): + from scripts.job_priority_scorer import score_job + job = _make_job(post_time="2026-05-01T10:00:00Z") + r = score_job(job, reference_date=date(2026, 5, 4)) + self.assertEqual(r.signals["freshness"]["freshness_score"], 5) + + +# ========================================================================= +# Tests: edge cases +# ========================================================================= + +class TestEdgeCases(unittest.TestCase): + """Edge cases: missing fields, unusual inputs.""" + + def test_missing_all_fields(self): + from scripts.job_priority_scorer import score_job + r = score_job({"job_title": "Engineer"}) + self.assertIsInstance(r.score, float) + self.assertIn(r.tier, ("high", "medium", "low", "reject")) + + def test_non_string_fields(self): + from scripts.job_priority_scorer import score_job + r = score_job({"job_title": 123, "company_name": 456}) + self.assertIsInstance(r.score, float) + + def test_none_fields(self): + from scripts.job_priority_scorer import score_job + r = score_job({"job_title": None, "company_name": None}) + self.assertIsInstance(r.score, float) + + def test_list_fields(self): + from scripts.job_priority_scorer import score_job + r = score_job({"job_title": ["Engineer"]}) + self.assertIsInstance(r.score, float) + + def test_very_long_job_title(self): + from scripts.job_priority_scorer import score_job + r = score_job({"job_title": "Senior " * 20 + "Engineer"}) + self.assertIsInstance(r.score, float) + + def test_empty_location_with_remote_jd(self): + """Without an explicit workplace_type the work_arrangement falls to the + unknown branch (no workplace_type is inferred from JD keywords).""" + from scripts.job_priority_scorer import score_job + r = score_job({ + "job_title": "Engineer", + "location": "", + "job_description": "Fully remote position from anywhere.", + }) + self.assertIsInstance(r.score, float) + # workplace_type is empty so unknown branch: no UK signal -> 0 + self.assertEqual(r.signals["work_arrangement"]["score"], 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sync_autocli_jobs.py b/tests/test_sync_autocli_jobs.py new file mode 100644 index 0000000..e289a74 --- /dev/null +++ b/tests/test_sync_autocli_jobs.py @@ -0,0 +1,89 @@ +"""Tests for sync/upsert — TDD RED phase. + +Tests for NormalizedJob dataclass field passthrough. +Database-level tests (idempotency, source_records) will be added later. +""" + +import json +import unittest + + +def _make_raw_record(**overrides) -> dict: + """Helper to build a raw record in the format clean_linkedin_jobs.py outputs.""" + defaults = { + "title": "Cloud Engineer", + "company": "Example Corp", + "location": "Remote", + "source": "linkedin", + "source_channel": "recommended", + "apply_type": "easy_apply", + "url": "https://linkedin.com/jobs/view/123", + "url_normalized": "https://linkedin.com/jobs/view/123", + "url_hash": "abc123def456", + "external_url": "https://example.com/apply", + "easy_apply": "true", + "jd": "We need a cloud engineer...", + "salary": {"raw": "$100k-$150k", "min": 100000, "max": 150000, "currency": "USD", "period": "year"}, + "posted_time": "2026-05-01", + } + defaults.update(overrides) + return defaults + + +class TestNormalizedJobFields(unittest.TestCase): + """Tests that NormalizedJob passes through new fields.""" + + def test_normalize_job_passes_source_channel(self): + """NormalizedJob should include source_channel.""" + from scripts.sync_autocli_jobs import normalize_job + + rec = _make_raw_record(source_channel="recommended") + job = normalize_job("linkedin", rec) + + self.assertIsNotNone(job) + assert job is not None + self.assertEqual(job.source_channel, "recommended") + + def test_normalize_job_passes_apply_type(self): + """NormalizedJob should include apply_type.""" + from scripts.sync_autocli_jobs import normalize_job + + rec = _make_raw_record(apply_type="easy_apply") + job = normalize_job("linkedin", rec) + + self.assertIsNotNone(job) + assert job is not None + self.assertEqual(job.apply_type, "easy_apply") + + def test_normalize_job_passes_url_hash(self): + """NormalizedJob should include url_hash.""" + from scripts.sync_autocli_jobs import normalize_job + + rec = _make_raw_record(url_hash="xyz789") + job = normalize_job("linkedin", rec) + + self.assertIsNotNone(job) + assert job is not None + self.assertEqual(job.url_hash, "xyz789") + + def test_normalize_job_missing_new_fields_defaults_empty(self): + """Missing source_channel/apply_type/url_hash should default to empty string.""" + from scripts.sync_autocli_jobs import normalize_job + + rec = _make_raw_record() + # Remove new fields and easy_apply (which triggers apply_type inference) + rec.pop("source_channel", None) + rec.pop("apply_type", None) + rec.pop("easy_apply", None) + rec.pop("url_hash", None) + job = normalize_job("linkedin", rec) + + self.assertIsNotNone(job) + assert job is not None + self.assertEqual(job.source_channel, "") + self.assertEqual(job.apply_type, "") + self.assertEqual(job.url_hash, "") + + +if __name__ == "__main__": + unittest.main()