From 460d2e1b6c9182052f1d544af7bb9dea5b90b0d6 Mon Sep 17 00:00:00 2001 From: Cmochance <3216202644@qq.com> Date: Tue, 16 Jun 2026 16:22:17 +0800 Subject: [PATCH 01/10] =?UTF-8?q?feat(remote):=20Codex=20=E7=A7=BB?= =?UTF-8?q?=E5=8A=A8=E7=AB=AF=E8=BF=9C=E7=A8=8B=E6=8E=A7=E5=88=B6=20M1(Tel?= =?UTF-8?q?egram=20Bot=20Channel=20+=20CDP=20=E9=A9=B1=E5=8A=A8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 不依赖 GPT 账号、用手机远程驱动本地 Codex:transfer Rust 端跑一个 Telegram bot (纯 HTTPS long-poll,无 relay/公网回调,绕开 renderer CSP),授权用户发消息 → 经 CDP 把 prompt 灌进 Codex 输入框(ProseMirror)、提交、读流式输出回编到 Telegram。 - src-tauri/src/codex_remote/{driver,telegram,mod}.rs:CodexDriver(CDP 驱动)+ Telegram 客户端 + daemon(白名单鉴权 / 命令 /new /stop /status /help / 流式回复) - 复用 codex_theme_injector 的 CDP 原语 + codex_quota_injector 的 daemon 骨架 - should_attach_debug_port 加 codexRemoteControlEnabled;settings UI(开关+token+白名单,中英 i18n) - 借鉴 zcode v3.1.0 Bot Channel 设计(不移植代码);默认关;Windows MSIX 不支持 Refs MOC-249 --- README.en.md | 1 + README.md | 1 + frontend/index.html | 17 + frontend/js/app.js | 14 + frontend/js/i18n.js | 12 + .../src/admin/services/desktop/process.rs | 9 +- src-tauri/src/codex_remote/driver.rs | 261 ++++++++++ src-tauri/src/codex_remote/mod.rs | 459 ++++++++++++++++++ src-tauri/src/codex_remote/telegram.rs | 145 ++++++ src-tauri/src/main.rs | 5 + 10 files changed, 923 insertions(+), 1 deletion(-) create mode 100644 src-tauri/src/codex_remote/driver.rs create mode 100644 src-tauri/src/codex_remote/mod.rs create mode 100644 src-tauri/src/codex_remote/telegram.rs diff --git a/README.en.md b/README.en.md index 6719a67f..f7a7cd83 100644 --- a/README.en.md +++ b/README.en.md @@ -91,6 +91,7 @@ Injects a standalone "Usage" section at the bottom of Codex Desktop's "Toggle pi - **Injected system prompts follow the UI language**: the `apply_patch` chat-path rules + autocompact summarization prompt that this project injects for non-OpenAI providers track the `语言 / Language` setting (Chinese users → Chinese prompts, avoiding mixed-language model thinking); V4A keywords (`*** Begin Patch` / `@@
` etc.) + Codex CLI error message originals stay in English (parser / matcher does not accept translations) - **Codex Desktop Theme (optional, off by default)**: Theme page ships 11 built-in anime themes (`carton` with a floating mascot, plus `changli` / `azurlane` / `nailin` / `zani` / `frost` / `nocturne` / `duet` / `rose` / `sonata` / `studio`), each individually colour-matched to its artwork (per-theme glass + accent). Injects design-token overrides (`--color-token-*` + the runtime `--color-*` layer) + a background image into Codex Desktop via CDP, covering chat / settings / collapsed-sidebar / popovers. Toggle is independent from Plugin Unlock; page reload re-applies automatically; disabling the toggle only clears the saved preference — any already-injected theme stays until the next Codex restart - **Usage panel inside Codex Desktop (optional, off by default)** (MOC-204): Settings → "Show usage in Codex" injects a collapsible "Usage" section at the bottom of Codex's "Toggle pinned summary" popup (the panel that contains Environment / Sources sections), showing up to 4 rows: ① **5-hour quota / weekly quota** — whitelisted providers only: **antigravity gemini series** reads from `cloudcode-pa.googleapis.com/v1internal:retrieveUserQuotaSummary` (dual-window 5h + weekly, remaining% = remainingFraction×100); **GLM Coding Plan** (`bigmodel.cn` / `z.ai` coding hosts) reads from `monitor/usage/quota/limit` (apiKey auth, no Bearer prefix), returning 5h + weekly TOKENS_LIMIT records, converted as remaining% = 100 − usage%; **Xiaomi MiMo Token Plan** (`platform.xiaomimimo.com`) shows a monthly-plan remaining% progress bar — the plan quota is only accessible via a MiMo web session (httpOnly cookie), so you must click "Sign in to Xiaomi account" in the provider edit page first: the app opens an embedded webview for login, captures the session cookie, and the daemon uses it to query `/api/v1/tokenPlan/usage`; **DeepSeek** (`api.deepseek.com`) shows a ¥X balance numeric entry, read from the official `/user/balance` endpoint using the same API key (Bearer); **Kimi (月之暗面 / Moonshot PAYG, `api.moonshot.cn` / `.ai`)** shows balance numeric entries (available / cash / voucher, ¥/$ by host), read from the official `/v1/users/me/balance` using the same key (Bearer) — **note: the subscription-based `kimi-code` (`api.kimi.com/coding`) is a separate provider with no balance endpoint and is excluded**; **anyrouter** (`api.anyrouter.top`) shows a $X used-amount numeric entry, read from `/v1/dashboard/billing/usage` using the same key (Bearer; remaining balance is blocked by upstream anti-scraping so only the used amount is shown). Whitelist is determined by baseUrl host. Red warning ≤10% + reset time shown. Quota rows appear only when the active provider matches a whitelisted host; all others show no quota rows. ② **Context** — injected JS reads `contextUsage.usedTokens` + `contextWindow` directly from Codex's React fiber, available immediately for any existing conversation without a new turn; full window = contextWindow ÷ 0.95 (adds back the 5% reserve Codex hides); 1M models display "1M" not "1000k". ③ **Tokens (real-time rate · cumulative)** — rate estimated by a MutationObserver watching Codex's streaming text (2s sliding window, CJK-aware); cumulative total from Codex rollout. ④ **Cache hit rate** — from rollout cached_input/input. **③④ and the rate are all isolated per active conversation (MOC-230)**: injected JS reads the current `conversationId` from the React fiber and the daemon keys totals to that conversation's rollout (== filename uuid, not the most-recently-modified file), following conversation switches; shows "—" (never another conversation's data) when the id / its rollout can't be resolved. The "Usage" title is collapsible (chevron + localStorage-persisted). Injection uses periodic CDP pushes; re-attaches automatically after a Codex page reload or restart. Requires launching Codex through this app; restart Codex after toggling if already running. +- **Codex mobile remote control (optional, off by default)** (MOC-249, M1): Settings → "Mobile remote control (Telegram)", fill in a Telegram Bot Token + an allowed-users whitelist (numeric user ids or @usernames, comma-separated). When enabled, transfer runs a Telegram bot (pure HTTPS long-poll — no relay / public callback): authorized users message the bot from their phone to remotely drive the Codex launched by this app — transfer injects the prompt into Codex's composer (ProseMirror) over CDP, submits, and streams the reply back into the Telegram message. Commands: `/new` (new chat), `/stop` (stop current turn), `/status`, `/help`; plain text = one prompt turn. **⚠️ Remote control is equivalent to operating this machine remotely — only authorize your own account and keep the Bot Token safe**; requires launching Codex through this app (the toggle affects the debug port), restart Codex after toggling. M1 is conversational (if Codex needs to approve a command/tool, confirm on the desktop; approval-relay-to-phone is a later phase). The Windows Store (MSIX) build is unsupported due to the debug-port passthrough gap. - **System-proxy (VPN/ladder) connectivity detection** (MOC-114): the dashboard "Network Proxy" card shows live status — connected / disconnected / PAC auto-config / detecting. In relay real-account mode, the "Auto-unlock Codex Plugins" toggle gates on both conditions being met (valid account AND proxy reachable), preventing the silent-failure state where plugins spin and return 502s while the UI shows "logged in" because the proxy is down. Detection uses a short-timeout TCP connect to the proxy port only; chatgpt.com is never contacted. - **Built-in web fetch tool (web_fetch, MOC-144)**: Settings → "Built-in web fetch backend" — select `auto` (recommended; **defaults to `auto` since MOC-215, works out of the box** — new users get web_fetch / web_search without manually enabling it; web_fetch uses curl/wreq and needs no Chrome, web_search is still gated on Chrome readiness and never silently downloads) / `curl` / `wreq` / `headless` (**independent of** the Codex sandbox network toggle). Transfer automatically registers a `web_fetch` MCP tool with Codex, which the model can call directly to fetch web pages — `curl` uses standard HTTP, `wreq` bypasses Cloudflare TLS challenges, `headless` drives a headless Chrome to retrieve JS-rendered DOM (first-time headless use prompts to download chrome-headless-shell, ~86 MB, if Chrome is not installed). Beyond the three fetch backends, `web_fetch` also follows **HTML `meta refresh` / JS `location` redirects** (re-fetches the target URL, loop-protected to 3 hops) — curl/wreq/headless only follow HTTP 3xx and do not handle these client-side redirects; "placeholder" redirect pages (e.g. pages that bounce around Twitter/Substack blocks) are now automatically followed to the real destination (MOC-139). **`auto` tier (MOC-161)**: automatically escalates from curl → wreq → headless based on page-difficulty signals; remembers the last successful tier per origin so subsequent requests start there; downgrades to curl when no system proxy is reachable (wreq / headless rely on a proxy); first use of the headless tier still confirms the Chrome download. Switching tiers takes effect immediately (no restart needed); **toggling the feature on or off requires restarting Codex Desktop** for the network tools (web_fetch / web_search / read_url_local) to appear / disappear in Codex (since MOC-235 the MCP server stays registered to host `read_tool_artifact`; turning the network backend off just stops exposing those network tools rather than unloading the whole server). Fetched HTML is auto-converted to markdown before returning to the model (cleaner, fewer tokens; non-HTML responses pass through unchanged), and headless waits for networkIdle before capturing the rendered DOM (MOC-145). Headless fetches run with anti-detection stealth (strips `navigator.webdriver`, fakes `window.chrome`/plugins/WebGL, removes the `HeadlessChrome` UA token), passing passive-fingerprint / simple JS-challenge Cloudflare; interactive Turnstile/DataDome managed challenges still won't pass (MOC-152). On a CF JS-challenge page, headless now **waits in place for it to auto-clear** before reading (instead of returning the challenge page as content), and **persists the browser profile per origin** to reuse CF clearance cookies — a second fetch of the same site skips the repeat challenge and is faster (MOC-156). Before markdown conversion the page goes through **main-content extraction** (readability algorithm strips nav/header/footer/sidebar/ads, keeping only the article so large-page content is no longer crowded out by truncation; non-article pages fall back to the full page); **binary resources** (image / video / audio / PDF) and files over 16 MB are not downloaded and return a clear notice instead (no more garbage bytes / OOM) (MOC-152). `web_fetch` **returns the full extracted page text by default** (the current turn's tool output goes into the LLM context in full; the adapter layer automatically compresses older tool outputs to prevent context overflow; MOC-190) — no more pagination, no `offset` paging, no relevance-based `query` chunk selection, so precise content (code / schema / version numbers / figures) is never lost. If you fetched a URL earlier in the conversation and its content has since been folded/compressed in the context history, use **`read_url_local(url)`** to pull the full text from the in-process cache without re-fetching (cache TTL: 15 min). **More generally, when any tool's large output (shell / Feishu and other MCP / etc.) gets folded into a `[Tool output stored outside model context]` summary in history, the summary includes an `Artifact ID`, and the model can call `read_tool_artifact(artifact_id)` to retrieve that output's text** — read from the shared `tool_artifacts.db` (SQLite WAL, cross-process) that the proxy persists when compressing, so the model never re-runs a tool just to see history again; the retrieved content is visible only in the current turn and gets folded again next turn (no long-term context bloat); outputs over 90k chars are returned in pages (each below the proxy keep-full cap, with a trailer telling the model to page via `offset`) (MOC-235). These tools (`web_fetch` / `web_search` / `read_url_local` / `read_tool_artifact`) declare `readOnlyHint` (read-only), so Codex's auto-review guardian **skips approval** for them (`requires_mcp_tool_approval` short-circuits on the read-only hint) — network calls no longer incur a per-call risk-approval round-trip, removing that latency (MOC-172). - **Built-in web search tool (web_search, MOC-12)**: when the built-in web fetch backend is on (non-off) and the machine has Chrome ready, transfer registers a `web_search` tool with Codex — the model passes a query string and gets back a structured list of results (title + real URL + snippet), forming a **two-step search**: `web_search` to find sources, then `web_fetch` to read content, eliminating the need to guess URLs. **Why this matters**: Codex sends an OpenAI server-side `web_search` tool each turn, but third-party chat providers (MiniMax / DeepSeek / GLM / Kimi, etc.) don't support it — the adapter drops it, leaving the model to scrape search engines or guess URLs (real-world success rate ~17%). This tool queries **DuckDuckGo + Bing in parallel and merges the results, deduped by normalized URL** (no API key required, data-centre / VPN-exit IP friendly; the two indexes complement each other so single-call coverage is noticeably broader than a single source, MOC-215; previously Bing was only a fallback when DDG failed, MOC-186), and **always uses headless** internally — DDG / Bing block plain HTTP with anti-bot challenges regardless of TLS fingerprint, so a real browser is required; the parallel fetch keeps wall-time ≈ the slower single engine rather than the sum, and either engine being blocked / empty still leaves the other usable. `web_search` always uses headless internally, but its **exposure / invocation only requires Chrome to be ready** (system Chrome / Edge / Chromium, or an already-downloaded built-in chrome-headless-shell) — decoupled from the web_fetch tier: users with system Chrome can use search under any non-off tier (incl. curl / wreq) without triggering a download; if neither is present it stays hidden and a call returns a hint to pick the headless tier to complete the first-time download (MOC-190). Ad results are filtered out; blocked / no-results states return explicit error messages (never silently empty). **Pagination (MOC-215)**: `web_search` returns only the first page (~10-20 results, not fetching multiple pages at once to avoid excessive headless latency); when the model needs more / different sources it uses the separate **`web_search_more`** tool (same query, `page=2/3…`) to fetch the next batch (via Bing's `first=` deep pages), with a tail hint in the result steering the model to paginate rather than re-run the same query — numeric string arguments are parsed leniently (models often send `page` as the string `"2"`) so pagination never silently falls back to page 1. DDG HTML parsing borrows from `duckduckgo_search` (Python). diff --git a/README.md b/README.md index dcb7f991..82bd1580 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ Codex App Transfer 是一个面向 **OpenAI Codex APP** 的轻量桌面配置 + - **注入的 system prompts 跟随界面语言**:本项目对非 OpenAI provider 注入的 `apply_patch` chat-path 规则 + autocompact 总结提示词,跟设置里 `语言 / Language` 一致(中文用户 → 中文 prompt,避免模型中英混杂思考);V4A 关键字(`*** Begin Patch` / `@@
` 等)+ Codex CLI 错误消息原文保英文(parser / matcher 不接受翻译) - **Codex Desktop 主题(可选,默认关)**:Theme 页内置 11 套动漫主题(`carton` 含浮动看板娘,其余 `changli` / `azurlane` / `nailin` / `zani` / `frost` / `nocturne` / `duet` / `rose` / `sonata` / `studio`),每套按背景图独立调出暗玻璃 + 强调色。通过 CDP 向 Codex Desktop 注入设计令牌覆盖(`--color-token-*` + 运行时 `--color-*` 层)+ 背景图,覆盖聊天 / 设置页 / 折叠侧栏 / 弹层等各视图。开关跟 Plugin Unlock 独立,page reload 自动重应用;关闭开关只落盘清除偏好,已注入主题保留至 Codex 下次重启自然消失 - **Codex 内用量显示(可选,默认关)**(MOC-204):设置 → 「Codex 内显示用量信息」,在 Codex Desktop 顶栏「Toggle pinned summary」弹窗(含 Environment / Sources 等分区)底部注入独立「Usage」用量分区,最多 4 行:① **5 小时额度 / 每周额度**:仅白名单 provider 显示:**antigravity gemini 系**数据来自 `cloudcode-pa.googleapis.com/v1internal:retrieveUserQuotaSummary` 双窗口**剩余**额度(remainingFraction×100);**GLM Coding Plan**(`bigmodel.cn`/`z.ai` coding 系)数据来自 `monitor/usage/quota/limit` 端点(apiKey 直接鉴权,不带 Bearer),返回 5h / 每周 TOKENS_LIMIT,已用% → 剩余% = 100-已用;**小米 MiMo Token Plan**(`platform.xiaomimimo.com`)显示月度套餐剩余%进度条,需在 provider 编辑页点「登录小米账号」按钮——套餐用量只在小米控制台、走 httpOnly session cookie,app 内嵌 webview 登录后抓取 cookie 存本地,daemon 带该 cookie 查询 `/api/v1/tokenPlan/usage`;**DeepSeek**(`api.deepseek.com`)显示余额 ¥X 数值条目,调官方 `/user/balance` 接口、与推理同一把 API key(Bearer);**Kimi(月之暗面 / Moonshot PAYG,`api.moonshot.cn`/`.ai`)**显示余额数值条目(可用 / 现金 / 赠金,按 host 记 ¥/$),调官方 `/v1/users/me/balance`、与推理同一把 key(Bearer)——**注:订阅制 `kimi-code`(`api.kimi.com/coding`)是另一个 provider、无此余额接口,不在此列**;**anyrouter**(`api.anyrouter.top`)显示已用额度 $X 数值条目,调 `/v1/dashboard/billing/usage`、与推理同一把 key(Bearer;账户余额受上游反爬限制仅展示已用量)。白名单均按 baseUrl host 判定。≤10% 红色预警 + 重置时刻;仅活动 provider 命中白名单时显示额度行,其余不显。② **上下文**:注入脚本直接从 Codex React fiber 读 `contextUsage.usedTokens / contextWindow`,有历史对话即立即显示(不需新对话);满窗口 = contextWindow÷0.95(加回 Codex 隐藏的 5% reserve);1M 模型显「1M」而非「1000k」。③ **Tokens(实时速率·累计)**:速率由 MutationObserver 监测 Codex 流式文本增量估算(2s 滑窗,CJK 感知);累计量来自 Codex rollout 文件。④ **缓存命中率**:来自 rollout 的 cached_input/input。**③④ + 速率均按活动对话隔离(MOC-230)**:注入脚本从 React fiber 读当前 `conversationId`,daemon 按该 id 取对应 rollout(== 文件名 uuid,非「最近修改」的文件),切对话即跟随、不串号;读不到 id / 无对应 rollout 显「—」(绝不显示别的对话数据)。「Usage」标题可折叠(chevron + localStorage 持久)。注入走 CDP 周期推送,页面刷新 / 重启后自动重挂;需通过本应用启动 Codex,若 Codex 已在运行需重启生效 +- **Codex 移动端远程控制(可选,默认关)**(MOC-249,M1):设置 → 「移动端远程控制(Telegram)」,填 Telegram Bot Token + 授权用户白名单(数字 user id 或 @username,逗号分隔)。开启后 transfer 跑一个 Telegram bot(纯 HTTPS long-poll,不需 relay / 公网回调):授权用户在手机 Telegram 给 bot 发消息,即可远程驱动本应用启动的 Codex 跑一轮对话——transfer 经 CDP 把 prompt 灌进 Codex 输入框(ProseMirror)、提交、读流式输出回编到 Telegram 消息。命令 `/new`(新建对话)`/stop`(停止当前轮)`/status` `/help`,纯文本即一轮 prompt。**⚠️ 远程控制等同远程操作本机,务必只授权本人账号、妥善保管 Bot Token**;需通过本应用启动 Codex(开关影响调试端口),改开关后需重启 Codex 生效。M1 为对话问答式(Codex 若需批准命令/工具仍需桌面端确认;批准转发到手机为后续阶段)。Windows Store(MSIX)版因调试端口透传缺口暂不支持 - **系统代理(梯子)连通性检测**(MOC-114):仪表盘「网络代理」卡实时显示系统代理是否活跃(已连接 / 未连接 / 自动配置 PAC / 检测中);relay 真实账号模式下「自动解锁 Codex Plugins」开关在账号有效且代理可达两条件同时满足时才激活,避免梯子没开时 plugins 静默全 502 却显示"已登录"的误导态。探测仅对代理端口做短超时 TCP 连通测试,不访问 chatgpt.com。 - **内置联网抓取工具(web_fetch,MOC-144)**:设置页 → 「内置联网抓取工具」选 `auto`(推荐,**MOC-215 起默认 `auto`、开箱即用** —— 新装用户无需手动开启即可用 web_fetch / web_search;web_fetch 走 curl/wreq 不需 Chrome,web_search 仍受 Chrome 就绪 gate 保护、不静默下载) / `curl` / `wreq` / `headless`(**独立于** Codex 沙箱联网开关),transfer 自动往 Codex 注册 `web_fetch` MCP 工具,Codex 模型可直接调该工具抓取网页 —— `curl` 走标准 HTTP、`wreq` 绕 Cloudflare TLS 挑战、`headless` 驱动无头 Chrome 取 JS 渲染后 DOM(首次选 headless 若未装 Chrome 会弹窗确认下载 chrome-headless-shell, ~86 MB)。三档之外,`web_fetch` 还能跟随 **HTML meta refresh / JS `location` 跳转**(重定向到目标 URL 重抓,防循环最多 3 跳)——curl/wreq/headless 只处理 HTTP 3xx,不跟这类客户端重定向;绕 Twitter/Substack 等封锁的"占位跳转页"会自动跟随到真实内容页(MOC-139)。**`auto` 档(MOC-161)**:按页面难度自动从 curl 升级到 wreq 再到 headless,对每个域名记住上次成功档位(下次从该档起步省试错);系统代理不可达时自动压制至 curl(wreq / headless 依赖代理);首次用 headless 档同样弹窗确认 Chrome 下载。切档即时生效(无需重启);**改"开/关"状态后需重启 Codex Desktop** 才会让联网工具(web_fetch / web_search / read_url_local)在 Codex 里出现 / 消失(MOC-235 起该 MCP server 始终注册以托管 `read_tool_artifact`,关闭联网档只是不再暴露这几个联网工具,不再卸载整个 server)。抓到的 HTML 会自动转成 markdown 返给模型(更省 token、更干净;非 HTML 响应原样透传),headless 用 networkIdle 等渲染落定再取(MOC-145)。headless 抓取启用反检测 stealth(抹 `navigator.webdriver`、伪造 `window.chrome`/插件/WebGL、UA 去 `HeadlessChrome` 标记),可过被动指纹 / 简单 JS 挑战类 Cloudflare;交互式 Turnstile/DataDome 托管挑战仍过不了(MOC-152)。headless 遇 CF JS 挑战页会**原地等其自动解出**再读(而非立即把挑战页当正文返回),并**按域名持久化浏览器 profile** 复用 CF 放行 cookie —— 同一站点二次抓取跳过重复挑战、更快(MOC-156)。抓到的页转 markdown 前先做**正文抽取**(readability 算法剥 nav/页眉/页脚/侧栏/广告,只留正文,大页正文不再被截断挤掉;非文章页自动回退整页);图片 / 视频 / 音频 / PDF 等**二进制资源**与超 16 MB 大文件不下载、直接返提示(不再吐乱码 / 防 OOM)(MOC-152)。`web_fetch` **默认直接返回抓取到的完整正文**(当前轮全文进 LLM 上下文、adapter 层自动把历史轮的 tool 输出压缩以防撑爆;MOC-190)—— 不再分页、不再按 `offset` 翻页、不再按 `query` 相关性选块,精确信息(代码 / schema / 版本 / 数字)不丢。若较早抓取的某 URL 正文在对话历史里被折叠 / 压缩、需要回看完整原文,用 **`read_url_local(url)`** 从进程内缓存取回,不必重新联网(缓存 15 min)。**更进一步,任意工具(shell / 飞书等 MCP / 其它)的大输出在历史里被压成 `[Tool output stored outside model context]` 摘要时,摘要会给出 `Artifact ID`,模型可调 `read_tool_artifact(artifact_id)` 取回该输出原文** —— 读 proxy 压缩时落盘的共享 `tool_artifacts.db`(SQLite WAL,跨进程读),不必为回看历史而重跑工具;取回内容仅当前轮可见、下一轮再被自动折叠不长期占上下文;超 90k 字符的大输出分页返回(每块低于 proxy keep-full 上限,末尾提示用 `offset` 逐块读完整)(MOC-235)。这些工具(`web_fetch` / `web_search` / `read_url_local` / `read_tool_artifact`)均声明 `readOnlyHint`(只读),Codex 的 auto-review guardian 据此**跳过审批**(`requires_mcp_tool_approval` 命中只读直接放行),联网调用不再逐次触发风险审批往返、消除审批延迟(MOC-172)。 - **内置 web_search 搜索工具(MOC-12)**:启用「内置联网抓取工具」(非 off)且本机 Chrome 就绪后,transfer 往 Codex 注册 `web_search` 工具 —— 模型给关键词即返回结构化结果列表(标题 + 真实 URL + 摘要),配合 `web_fetch` 组成**两段式联网**:先 `web_search` 找信息源、再 `web_fetch` 抓正文,免去模型瞎猜 URL。**为什么需要**:Codex 默认每轮发的 OpenAI server-side `web_search` 在第三方 chat provider(MiniMax / DeepSeek / GLM / Kimi 等)上游不被支持、被协议层 drop,模型只能退化到自己抓搜索引擎页 / 猜 URL(真机实测成功率仅 ~17%)。本工具走 **DuckDuckGo + Bing 双引擎并行检索、按 URL 归一化去重后交错合并**(免 key、对数据中心 / VPN 出口 IP 友好;两家索引互补、单次覆盖面较单源明显更全,MOC-215;此前 Bing 仅在 DDG 失败时兜底 MOC-186),且**内部固定 headless** 浏览器代搜 —— DDG / Bing 对纯 HTTP 请求反爬拦截(无论 TLS 指纹多真),必须真浏览器跑 JS;并行抓取故 wall-time ≈ 单家而非求和,任一引擎被拦 / 无结果时另一家仍可用。`web_search` 内部固定 headless,但其**暴露 / 调用只要求本机 Chrome 就绪**(系统装了 Chrome / Edge / Chromium,或已下载内置 chrome-headless-shell)—— 与 web_fetch 档位解耦:系统有 Chrome 的用户在任意非 off 档(含 curl / wreq)都能用 search 且不触发下载;两者皆无则不暴露、调用返回提示引导去 headless 档完成首次下载(MOC-190)。结果自动过滤广告;反爬拦截 / 无结果时返回明确提示(不静默吐空)。**翻页(MOC-215)**:`web_search` 只返第 1 页(约一二十条,不一次扩抓多页以免 headless 延迟过高);模型需要更多 / 不同来源时用独立工具 **`web_search_more`(同 query, page=2/3…)** 取下一批(走 Bing `first=` 深页),结果尾部附诱导提示引导模型主动翻页而非用同一 query 重复搜 —— 工具参数对数字字符串(模型常把 `page` 传成 `"2"`)做宽容解析,避免翻页静默退回第 1 页。DDG HTML 解析模式借鉴 `duckduckgo_search`(Python)上游。 diff --git a/frontend/index.html b/frontend/index.html index e6de2f90..97167ca1 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -517,6 +517,23 @@

缓存命中分布

在 Codex 的固定摘要(pinned summary)弹窗底部显示用量面板:上下文占用、Tokens 速率与累计(所有 provider),以及 5 小时 / 每周额度(仅支持的 provider)。需 Codex 通过本应用启动;若 Codex 已在运行,改动开关后需重启 Codex 生效。

+
+ 移动端远程控制(Telegram) +
+ +

开启后 transfer 跑一个 Telegram bot:授权用户在手机上给 bot 发消息,即可远程驱动本应用启动的 Codex 跑一轮对话并流式收到回复。需 Codex 通过本应用启动;改动开关后需重启 Codex 生效。⚠️ 远程控制等同远程操作本机,务必只授权本人账号、妥善保管 Bot Token。

+
+ + +

从 Telegram @BotFather 创建 bot 后拿到的 HTTP API token。

+
+
+ + +

允许控制的 Telegram 数字 user id 或 @username,逗号分隔。未授权用户给 bot 发消息时会收到自己的 id,方便你加进来。

+
+
+
- -

允许控制的 Telegram 数字 user id 或 @username,逗号分隔。未授权用户给 bot 发消息时会收到自己的 id,方便你加进来。

+ +

允许控制的 Telegram 数字 user id,逗号分隔。出于安全只认数字 id(username 可被改/夺取,不支持)。未授权用户给 bot 发消息时会收到自己的数字 id,方便你加进来。

diff --git a/frontend/js/i18n.js b/frontend/js/i18n.js index d9a1d3e8..063cfa71 100644 --- a/frontend/js/i18n.js +++ b/frontend/js/i18n.js @@ -560,7 +560,7 @@ "settings.codexRemoteControlBotToken": "Bot Token", "settings.codexRemoteControlBotTokenHint": "从 Telegram @BotFather 创建 bot 后拿到的 HTTP API token。", "settings.codexRemoteControlAllowedUsers": "授权用户白名单", - "settings.codexRemoteControlAllowedUsersHint": "允许控制的 Telegram 数字 user id 或 @username,逗号分隔。未授权用户给 bot 发消息时会收到自己的 id,方便你加进来。", + "settings.codexRemoteControlAllowedUsersHint": "允许控制的 Telegram **数字 user id**,逗号分隔。出于安全只认数字 id(username 可被改/夺取,不支持)。未授权用户给 bot 发消息时会收到自己的数字 id,方便你加进来。", "settings.autoUnlockRestartCodex": "重启 Codex", "settings.pluginUnlockRuntimeStatus": "运行时状态:未检测", "settings.pluginUnlockRuntimeStatusPrefix": "运行时状态:", @@ -1293,7 +1293,7 @@ "settings.codexRemoteControlBotToken": "Bot Token", "settings.codexRemoteControlBotTokenHint": "The HTTP API token you get after creating a bot via Telegram @BotFather.", "settings.codexRemoteControlAllowedUsers": "Allowed users", - "settings.codexRemoteControlAllowedUsersHint": "Telegram numeric user ids or @usernames allowed to control, comma-separated. Unauthorized users who message the bot get their own id back so you can add them.", + "settings.codexRemoteControlAllowedUsersHint": "Telegram **numeric user ids** allowed to control, comma-separated. For security only numeric ids are honored (usernames are mutable/reassignable and are not supported). Unauthorized users who message the bot get their own numeric id back so you can add them.", "settings.autoUnlockRestartCodex": "Restart Codex", "settings.pluginUnlockRuntimeStatus": "Runtime status: not detected", "settings.pluginUnlockRuntimeStatusPrefix": "Runtime status: ", diff --git a/src-tauri/src/codex_remote/mod.rs b/src-tauri/src/codex_remote/mod.rs index ec909956..f7f1fdff 100644 --- a/src-tauri/src/codex_remote/mod.rs +++ b/src-tauri/src/codex_remote/mod.rs @@ -56,7 +56,8 @@ fn bot_token() -> Option { .map(str::to_owned) } -/// 授权用户白名单(numeric id 或 `@username`,大小写不敏感)。 +/// 授权用户白名单(**仅数字 user id 生效**,bot-review P1)。仍兼容旧配置里残留的 +/// `@username` 项(剥 `@` 后保留),但它们匹配不到任何数字 id、不再授权。 fn allowed_users() -> Vec { settings() .as_ref() @@ -72,18 +73,15 @@ fn allowed_users() -> Vec { .unwrap_or_default() } +/// 仅按**稳定的数字 user id** 鉴权(bot-review P1):Telegram username 可改 / 被重分配, +/// 用 username 鉴权会让放弃或夺取该 handle 的新持有者获得本机远程控制权。白名单里的 +/// 非数字项(@username)不再生效(`allowed_users` 已剥 `@`,非数字串匹配不到任何 id)。 fn is_authorized(msg: &Message, allow: &[String]) -> bool { let Some(from) = &msg.from else { return false; }; let id = from.id.to_string(); - allow.iter().any(|a| { - a == &id - || from - .username - .as_deref() - .is_some_and(|u| u.eq_ignore_ascii_case(a)) - }) + allow.iter().any(|a| a == &id) } /// Telegram bot daemon 主循环。开关关 / 无 token 时空转;开启后长轮询并分发。 @@ -204,28 +202,25 @@ async fn handle_message(client: &TelegramClient, msg: Message) { if !is_drivable_chat(&msg) { return; } + let sender_id = msg.from.as_ref().map(|f| f.id); if !is_authorized(&msg, &allowed_users()) { - let (id, uname) = msg - .from - .as_ref() - .map(|f| { - ( - f.id.to_string(), - f.username.clone().unwrap_or_else(|| "(无)".into()), - ) - }) - .unwrap_or_else(|| ("?".into(), "?".into())); + let id = sender_id + .map(|i| i.to_string()) + .unwrap_or_else(|| "?".into()); notify( client, chat_id, &format!( - "⛔ 未授权。请在 transfer 设置的「远程控制白名单」加入以下任一,再重试:\n\ - • user id = {id}\n• username = @{uname}" + "⛔ 未授权。请在 transfer 设置的「远程控制白名单」加入你的**数字 user id**,再重试:\n\ + • user id = {id}\n(出于安全,只按数字 id 授权,不支持 @username)" ), ) .await; return; } + let Some(sender_id) = sender_id else { + return; + }; let text = msg.text.unwrap_or_default(); let text = text.trim(); @@ -235,7 +230,7 @@ async fn handle_message(client: &TelegramClient, msg: Message) { if let Some(cmd) = text.strip_prefix('/') { handle_command(client, chat_id, cmd).await; } else { - run_turn(client, chat_id, text).await; + run_turn(client, chat_id, sender_id, text).await; } } @@ -299,12 +294,19 @@ const HELP_TEXT: &str = "🤖 Codex 远程控制\n\ 注:M1 为对话问答式;若 Codex 需要批准命令/工具,请到桌面端确认(批准转发到手机为 M2)。"; /// 驱动 Codex 跑一轮并流式回发。取 [`TURN_LOCK`] 串行化(一台 Codex 一次一轮)。 -async fn run_turn(client: &TelegramClient, chat_id: i64, prompt: &str) { +/// `sender_id` = 发起者数字 user id,用于取锁后重新鉴权(排队期间可能被移出白名单)。 +async fn run_turn(client: &TelegramClient, chat_id: i64, sender_id: i64, prompt: &str) { let _guard = TURN_LOCK.lock().await; - // 排队等锁期间(前一轮可能跑了几分钟)用户可能已关远程控制 → 取到锁后重查, - // 已关则不驱动 Codex(bot-review P2)。 - if !enabled() { - notify(client, chat_id, "ℹ️ 远程控制已关闭,本次指令已取消。").await; + // 排队等锁期间(前一轮可能跑数分钟)配置 / 白名单可能变 → 取到锁后**重查**: + // 关开关 / 发起者已被移出白名单,则取消本次,不驱动 Codex(bot-review P2)。 + let still_authorized = allowed_users().iter().any(|a| a == &sender_id.to_string()); + if !enabled() || !still_authorized { + notify( + client, + chat_id, + "ℹ️ 远程控制已关闭或你已不在白名单,本次指令已取消。", + ) + .await; return; } @@ -455,16 +457,19 @@ async fn run_turn(client: &TelegramClient, chat_id: i64, prompt: &str) { }; let _ = client.send_message(chat_id, msg).await; } - // 完成判定(bot-review P2):必须有**真实 idle 证据**,不靠纯 stable-text 的 drift。 - // ① done_idle:submitting 明确读到 false(显式空闲)+ 内容稳定; - // ② done_final:Codex 给最终 assistant 答案打了 final-assistant 标记(streaming / - // Thinking / 跑工具 / 等批准期间**不在**)+ 内容稳定 —— 比 isSubmitting 可靠, - // 且覆盖 fiber 漂移读不到 submitting 的 Codex 版本(codex-e2e-test skill 实证)。 - // 两者都拿不到(submitting 漂移 + 无 final 标记,如纯工具轮)→ 不提前完成,持锁 - // 等到 MAX_POLLS,由循环后的「超时仍在跑则 stop」兜底,绝不在仍在跑时放锁。 - let done_idle = snap.submitting == Some(false) && stable >= 2; + // 完成判定(bot-review P2):必须有**结束证据**,不在仍可能运行时放锁。 + // ① done_final:Codex 给最终 assistant 答案打了 final-assistant 标记(streaming / + // Thinking / 跑工具 / 等批准期间**不在**)+ 内容稳定 —— 最可靠的结束信号 + // (codex-e2e-test skill 实证),作主判据。 + // ② done_idle_corroborated:submitting==false **本身在 streaming 阶段也会误读 false** + // (见 driver Snapshot 文档),故不能 2 轮即收;要求长稳(8 轮 ≈10s 无变化)+ 非空 + // 文本作佐证,才把它当结束 —— 覆盖「无 final 标记但确已结束」的兜底。 + // 两者都拿不到(submitting 漂移 + 无 final 标记 + 无文本,如纯工具轮)→ 不提前完成, + // 持锁等到 MAX_POLLS,由循环后的「超时仍在跑则 stop」兜底,绝不在仍在跑时放锁。 let done_final = snap.final_ready && stable >= 2; - if done_idle || done_final { + let done_idle_corroborated = + snap.submitting == Some(false) && stable >= 8 && !last_seen.is_empty(); + if done_final || done_idle_corroborated { completed = true; break; } @@ -512,52 +517,49 @@ mod tests { use super::*; use telegram::{Chat, User}; - fn msg_from(id: i64, username: Option<&str>) -> Message { - msg_in_chat(id, username, Some("private")) + fn msg_from(id: i64) -> Message { + msg_in_chat(id, Some("private")) } - fn msg_in_chat(id: i64, username: Option<&str>, chat_kind: Option<&str>) -> Message { + fn msg_in_chat(id: i64, chat_kind: Option<&str>) -> Message { Message { chat: Chat { id: 100, kind: chat_kind.map(str::to_owned), }, - from: Some(User { - id, - username: username.map(str::to_owned), - }), + from: Some(User { id }), text: Some("hi".into()), } } #[test] fn only_private_chats_drivable() { - assert!(is_drivable_chat(&msg_in_chat(1, None, Some("private")))); - assert!(is_drivable_chat(&msg_in_chat(1, None, None))); // kind 缺失放行 - assert!(!is_drivable_chat(&msg_in_chat(1, None, Some("group")))); - assert!(!is_drivable_chat(&msg_in_chat(1, None, Some("supergroup")))); - assert!(!is_drivable_chat(&msg_in_chat(1, None, Some("channel")))); + assert!(is_drivable_chat(&msg_in_chat(1, Some("private")))); + assert!(is_drivable_chat(&msg_in_chat(1, None))); // kind 缺失放行 + assert!(!is_drivable_chat(&msg_in_chat(1, Some("group")))); + assert!(!is_drivable_chat(&msg_in_chat(1, Some("supergroup")))); + assert!(!is_drivable_chat(&msg_in_chat(1, Some("channel")))); } #[test] fn authorized_by_numeric_id() { let allow = vec!["12345".to_string()]; - assert!(is_authorized(&msg_from(12345, None), &allow)); - assert!(!is_authorized(&msg_from(99999, None), &allow)); + assert!(is_authorized(&msg_from(12345), &allow)); + assert!(!is_authorized(&msg_from(99999), &allow)); } #[test] - fn authorized_by_username_case_insensitive() { - let allow = vec!["Alice".to_string()]; - assert!(is_authorized(&msg_from(1, Some("alice")), &allow)); - assert!(is_authorized(&msg_from(1, Some("ALICE")), &allow)); - assert!(!is_authorized(&msg_from(1, Some("bob")), &allow)); + fn username_entries_never_authorize() { + // bot-review P1:白名单里的 @username(非数字)项绝不授权,只认稳定数字 id。 + let allow = vec!["alice".to_string(), "@bob".to_string()]; + assert!(!is_authorized(&msg_from(1), &allow)); + assert!(!is_authorized(&msg_from(42), &allow)); } #[test] fn unauthorized_when_no_from() { let allow = vec!["1".to_string()]; - let mut m = msg_from(1, None); + let mut m = msg_from(1); m.from = None; assert!(!is_authorized(&m, &allow)); } diff --git a/src-tauri/src/codex_remote/telegram.rs b/src-tauri/src/codex_remote/telegram.rs index 7eb1ad75..5b192539 100644 --- a/src-tauri/src/codex_remote/telegram.rs +++ b/src-tauri/src/codex_remote/telegram.rs @@ -40,8 +40,6 @@ pub struct Chat { #[derive(Debug, Clone, Deserialize)] pub struct User { pub id: i64, - #[serde(default)] - pub username: Option, } #[derive(Deserialize)]