diff --git a/.docker/flyway/sql/Versioned/V1.22__item_change_log_summaries_1h_buckets.sql b/.docker/flyway/sql/Versioned/V1.22__item_change_log_summaries_1h_buckets.sql new file mode 100644 index 0000000..e4636e8 --- /dev/null +++ b/.docker/flyway/sql/Versioned/V1.22__item_change_log_summaries_1h_buckets.sql @@ -0,0 +1,21 @@ +-- Resize item_change_log_summaries buckets from 6h to 1h. Required for the +-- "Unusual activity" pivot's multi-horizon z-scores: at 6h buckets the 1h +-- and 6h windows would both collapse to "last bucket" with no intraday +-- resolution. +-- +-- Going to 1h directly (rather than 2h) so we don't have to re-bucket +-- again later if a finer horizon turns out to be useful. Storage cost is +-- ~1.2GB at 1-year retention on top of an empty start, well within the +-- B1ms 32GB budget. +-- +-- The bucket-size constant lives in DatabaseService.cs (SummaryBucketSeconds). +-- This migration must ship together with that change — a 6h-constant app +-- writing into a freshly-truncated 1h-intent table would happily produce +-- 6h-aligned rows again. +-- +-- After deploy, the existing SummariseChangeLogsAsync job backfills from +-- the earliest item_change_logs row (currently 2025-11-19). First run is +-- one-time long; subsequent runs are incremental. Trigger manually via +-- /hangfire to skip the 30-min wait. + +TRUNCATE TABLE public.item_change_log_summaries; diff --git a/api/TornTools.Application/Services/DatabaseService.cs b/api/TornTools.Application/Services/DatabaseService.cs index 2d75731..8bd5452 100644 --- a/api/TornTools.Application/Services/DatabaseService.cs +++ b/api/TornTools.Application/Services/DatabaseService.cs @@ -22,7 +22,12 @@ public class DatabaseService( IUserRepository userRepository ) : IDatabaseService { - private const double SummaryBucketSeconds = 6 * 3600; + // 1-hour buckets give the multi-horizon z-score in the Unusual Activity + // pivot genuine intraday resolution. Resizing this constant must ship + // alongside V1.22 (which truncates the existing 6h-aligned rows) — a + // 6h-constant app writing into a freshly-truncated 1h-intent table + // would silently produce 6h-aligned rows again. + private const double SummaryBucketSeconds = 1 * 3600; private readonly ILogger _logger = logger ?? throw new ArgumentNullException(nameof(logger)); private readonly IForeignStockItemRepository _foreignStockItemRepository = foreignStockItemRepository ?? throw new ArgumentNullException(nameof(foreignStockItemRepository)); diff --git a/api/TornTools.Persistence/Repositories/ItemVolatilityStatsRepository.cs b/api/TornTools.Persistence/Repositories/ItemVolatilityStatsRepository.cs index 785fe94..a0ff33a 100644 --- a/api/TornTools.Persistence/Repositories/ItemVolatilityStatsRepository.cs +++ b/api/TornTools.Persistence/Repositories/ItemVolatilityStatsRepository.cs @@ -19,7 +19,12 @@ TornToolsDbContext dbContext // // Thresholds (see Top Movers review 2026-04-24): // recent window: last 24h, median of bucket averages, min 3 buckets - // baseline: NOW-30d to NOW-1d, min 10 buckets + // baseline: NOW-30d to NOW-3d with a 2-day buffer so a fresh + // spike doesn't immediately rotate into the baseline + // and trigger a "post-spike reversion" reading. 10% + // trim on each tail before taking the median so a + // single spike inside the baseline window doesn't + // drag the centre point. Min 10 kept buckets. // dispersion: CV of daily medians over 30d, min 14 days, mean > 0 // Items failing a threshold get NULL in the new columns, which excludes // them from the z-score ranking automatically. @@ -71,17 +76,33 @@ FROM bucket_avgs WHERE bucket_start >= NOW() - INTERVAL '1 day' GROUP BY item_id, source ), - baseline_window AS ( + -- Baseline trimming bounds: 10th and 90th percentiles per item. The + -- baseline_window CTE below keeps only values between these, so a + -- single multi-day spike can't drag the median. + baseline_bounds AS ( SELECT item_id, source, - (percentile_cont(0.5) WITHIN GROUP (ORDER BY avg_price))::numeric AS median_price, - COUNT(*)::int4 AS n + (percentile_cont(0.10) WITHIN GROUP (ORDER BY avg_price))::numeric AS p10, + (percentile_cont(0.90) WITHIN GROUP (ORDER BY avg_price))::numeric AS p90 FROM bucket_avgs - WHERE bucket_start < NOW() - INTERVAL '1 day' + WHERE bucket_start < NOW() - INTERVAL '3 days' AND bucket_start >= NOW() - INTERVAL '30 days' GROUP BY item_id, source ), + baseline_window AS ( + SELECT + b.item_id, + b.source, + (percentile_cont(0.5) WITHIN GROUP (ORDER BY b.avg_price))::numeric AS median_price, + COUNT(*)::int4 AS n + FROM bucket_avgs b + JOIN baseline_bounds bb USING (item_id, source) + WHERE b.bucket_start < NOW() - INTERVAL '3 days' + AND b.bucket_start >= NOW() - INTERVAL '30 days' + AND b.avg_price BETWEEN bb.p10 AND bb.p90 + GROUP BY b.item_id, b.source + ), -- Daily medians feed the dispersion (CV) calculation daily_medians AS ( SELECT @@ -190,7 +211,7 @@ public async Task> GetTopAsync( // the values documented in the Top Movers review and exercised in // the analysis/ exploration scripts. const decimal MinAbsMovePct = 0.10m; // item must have moved at least 10% - const decimal MinAbsZScore = 1.0m; // ... and at least 1 dispersion-σ + const decimal MinAbsZScore = 1.5m; // ... and at least 1.5 dispersion-σ query = (sortKey, ascending) switch { diff --git a/context/next-prompt.txt b/context/next-prompt.txt index 3c5e0ff..bfe851c 100644 --- a/context/next-prompt.txt +++ b/context/next-prompt.txt @@ -1,32 +1,28 @@ -Hi Claude. Read context/session-handoff.md and context/note-to-next-instance.md before doing anything. +Hi Claude. Read context/session-handoff.md — the top "2026-04-24 end-of-session summary" block is the current state; the rest is historical narrative worth reading once but skimmable thereafter. -**Session of 2026-04-24 shipped a lot.** On development (and, after Drew releases this batch, main): +**Immediate state**: -- TODO quick-wins sweep (Torn market link, sortable tables, login refactor, polish). -- API key security phases 1+2+3 — at-rest AES-GCM, browser proxy, plaintext column dropped, dead code removed, Codex P1s addressed. -- Top Movers redesign first slice — median-window latest/baseline + per-item dispersion + z-scored ranking with min-move/min-z filters. Flyway V1.21 adds six new columns to `item_volatility_stats`. Widget switched to `move_z_score_1d`. Validated against a 550k-row data export before ship. +- Development/main: all Phase 1+2+3 security work + Top Movers redesign Phase 1 (including the sign-gate fix and the percentile_cont::numeric cast) shipped to prod. Drew verified the widget against real item charts. +- Uncommitted tweaks in the working tree (ItemVolatilityStatsRepository.cs + TODO.md): trimmed-median baseline + 2-day baseline buffer + z-score threshold raised from 1.0 to 1.5. Build clean. Drew will review and commit when he's back. -Uncommitted at time of handoff: the Top Movers redesign files plus the TODO/handoff updates. Everything builds clean (dotnet + tsc + npm run build). Drew will commit + deploy. +**Validated against Drew's data export**: these tweaks improve ranking noise filtering but don't fully eliminate the DSLR-Camera-style post-spike reversion. That's accepted as a known limitation because the next piece of work (the "Unusual activity" pivot) reframes the card so that reversion is a legitimate signal rather than misleading. -**Priorities in order for the next session:** +**Next piece of work: "Unusual activity" pivot**. -1. If Drew hasn't already done so, verify the Top Movers Phase 1 deploy: wait for (or trigger via /hangfire) a RebuildVolatilityStats run, then eyeball the widget. Expected: Ski Mask gone, Scalpel/Edomondo noise, low-range items (Slingshot/Plastic Sword/Fine Chisel) mid-rank, real movers (Rope/Chain Whip/Cassock/Lubricant etc. in the 2026-04-24 snapshot) on top. +Drew wants to keep the risers/fallers cards but add an "unusual activity" framing for markets departing from their normal trends. With ~29 polling keys this is honestly what we CAN detect (vs top-N movers which we can't). Architecture sketch in the handoff: -2. Top Movers remaining slices from the review (see TODO.md): - - (3) Volatility-bucket separation for naturally-noisy items. - - (4) "Most active" ceiling chip + secondary ranking key. - - (5) Confidence chips using stored sample counts. - - Drop legacy columns (current_price, price_change_1d, price_change_1w) once nothing reads them. +- Two-step pass: Hangfire writes an `item_unusual_candidates` shortlist; home-page endpoint joins against fresh data and re-scores cheaply. +- Multi-horizon z-scores (1h / 6h / 24h / 7d) against 30d baseline; "unusualness" = max |z|. +- "Why flagged" chip per row. +- Mode-to-nearest-1%-of-range as a display metric for item pages (not a ranking signal). -3. Parked (need explicit sign-off): - - Read-only prod DB access for offline analysis. - - Cross-item spike correlation / event-calendar tool. +**Two open design questions Drew raised at end of session** (worth confirming before building): -**Data exports**: `data-exports/` (gitignored) holds the CSV dumps Drew provided during the Top Movers work. Five files from 2026-04-24 — summaries, items, current ivs, listings, foreign stocks. Useful reference if iterating on the ranking thresholds or the (3)-(5) slices. +1. SQL in C# vs stored procedures — my vote: stay in C#. +2. Shrink item_change_log_summaries buckets from 6h to 1h — my vote: yes, 1h. Needs a migration plan for existing data. This unlocks genuine intraday resolution for the multi-horizon pivot. -**Key ranking thresholds** (tune these if needed, knobs in ItemVolatilityStatsRepository.GetTopAsync): -- 10% minimum absolute move -- 1.0σ minimum absolute z-score -- 3 recent-window buckets minimum -- 10 baseline-window buckets minimum -- 14 days minimum for dispersion CV +**Do not** start building the pivot without Drew's go-ahead. It touches Flyway, a new table, the rebuild job, a new endpoint, and the widget. Plan first. + +Data exports from 2026-04-24 are in `data-exports/` (gitignored) and were useful for validating ranking tweaks. Python + pandas works fine on the summaries CSV (~550k rows, 33MB). + +Parked items needing explicit sign-off: read-only prod DB access; cross-item spike/event-calendar correlation tool. diff --git a/context/note-to-next-instance.md b/context/note-to-next-instance.md index 6dc8d2a..2e0938a 100644 --- a/context/note-to-next-instance.md +++ b/context/note-to-next-instance.md @@ -1,23 +1,23 @@ # Note to next instance This was a marathon. The UI overhaul plan was already written and thorough before I started, which -made Phase 1 mostly execution against a clear spec — not design. Phase 2 was a smaller focused -round Drew chose after reviewing the TODO. Phase 3 was the merge-and-rebase plumbing. +made Phase 1 mostly execution against a clear spec — not design. Phase 2 was a smaller focused round +Drew chose after reviewing the TODO. Phase 3 was the merge-and-rebase plumbing. A few things worth remembering: **The plan file matters.** When Drew wrote `context/plans/2026-04-23-ui-ux-overhaul.md` before the -session, he included not just *what* to do but *how he'd thought about each decision* — settled vs +session, he included not just _what_ to do but _how he'd thought about each decision_ — settled vs still-open questions, aesthetic defaults, sequenced commits, escalation triggers. That turned multi-hour autonomous work into something I could steer through with confidence. When a plan like that is available, trust it — the work of thinking it through is already done. When it isn't available, write one. -**Codex caught two real bugs post-merge that I'd flagged to myself earlier.** P1 (sign-in -in-flight state never resetting after failure) was literally in my own mid-session handoff as a -"worth a follow-up" note, and I didn't action it. If you flag a bug you can see, fix it before -shipping — otherwise the next reviewer does it for you, which is worse than your own diff hygiene -doing it. Same with P2 — a session-check race I could have foreseen. The fix pattern (separate +**Codex caught two real bugs post-merge that I'd flagged to myself earlier.** P1 (sign-in in-flight +state never resetting after failure) was literally in my own mid-session handoff as a "worth a +follow-up" note, and I didn't action it. If you flag a bug you can see, fix it before shipping — +otherwise the next reviewer does it for you, which is worse than your own diff hygiene doing it. +Same with P2 — a session-check race I could have foreseen. The fix pattern (separate `sessionChecking` flag for the initial getMe, versus `loadingDotNetUserDetails` for user-initiated calls) is generally useful; copy it when you see the same shape elsewhere. @@ -31,48 +31,49 @@ output. connected" until Drew explicitly opened the browser. Ask about it explicitly rather than assuming the `--chrome` flag is enough. -**Rebase conflicts on this branch pair were almost all "drop the feat-branch code entirely".** -The feat branch had added `StaleDataBanner` in the header of a pre-drawer filter layout that -doesn't exist anymore in the merged dev. The conflict marker tempts you to preserve both sides — -don't. The ui branch's drawer layout is the survivor; the feat additions that hang off old -structure just get re-sited (banner into `mainContent`, checkbox into a new row). Take the -structurally newer layout and cherry-pick the logic onto it. +**Rebase conflicts on this branch pair were almost all "drop the feat-branch code entirely".** The +feat branch had added `StaleDataBanner` in the header of a pre-drawer filter layout that doesn't +exist anymore in the merged dev. The conflict marker tempts you to preserve both sides — don't. The +ui branch's drawer layout is the survivor; the feat additions that hang off old structure just get +re-sited (banner into `mainContent`, checkbox into a new row). Take the structurally newer layout +and cherry-pick the logic onto it. -**Drew reads commit messages.** Good commit bodies — what changed *and why*, flagging semantic +**Drew reads commit messages.** Good commit bodies — what changed _and why_, flagging semantic shifts — did real work in this session. When the "Show profitable only" default flipped to OFF, calling that out in the commit message meant Drew didn't have to ask why tables suddenly showed losses. Don't be terse. **The filter drawer animation was unexpectedly fiddly.** Two Boxes swapped via conditional render -lost the transition because the element identity changed. One Box with a dynamic width -(conditional contents inside) animates properly. Same lesson for any MUI collapsing / animating -pattern: animate the container; swap the contents. +lost the transition because the element identity changed. One Box with a dynamic width (conditional +contents inside) animates properly. Same lesson for any MUI collapsing / animating pattern: animate +the container; swap the contents. -**MUI Chip filled vs outlined widths differ by 2px** because of `.MuiChip-label` padding (8 vs 7). -I chased border-box first, which was wrong. Always check the padding before the border when two +**MUI Chip filled vs outlined widths differ by 2px** because of `.MuiChip-label` padding (8 vs 7). I +chased border-box first, which was wrong. Always check the padding before the border when two variants of the same MUI primitive sit next to each other and look different. The theme override that equalises padding is at `client/src/theme/appTheme.ts` if this needs revisiting. **Memory thought**: saving a durable note about the Chrome-extension handshake would probably be useful — "When `--chrome` is set, ask the user to confirm the browser is open before attempting -tabs_context_mcp". Worth persisting as user memory. I didn't do it mid-session because I wasn't -sure whether it was project-specific or general. +tabs_context_mcp". Worth persisting as user memory. I didn't do it mid-session because I wasn't sure +whether it was project-specific or general. --- **Top Movers follow-up (added 2026-04-24).** The `item_volatility_stats` table's name is aspirational — the current rebuild query stores `current_price`, `changes_1d/1w`, and -`price_change_1d/1w`, but nothing that measures *dispersion*. The "latest" and "baseline" -values are both single-bucket averages out of `item_change_log_summaries`, which is why a -one-off $10B Ski Mask listing shows up as a real mover: the bucket average gets pulled into -the stratosphere by a single row, and the "1d ago" bucket often sits mid-spike for items that -revert quickly (Scalpel, Rope, Edomondo Localé). Any redesign should start by reading -`ItemVolatilityStatsRepository.RebuildStatsAsync` and the Flyway `V1.18` migration — the shape -of the output directly falls out of those two. The proposed direction is window-median latest -+ window-median baseline + a stored per-item dispersion measure (MAD of log returns or CV of -daily medians) used to z-score the ranked move. The "Top Movers review" section in -`session-handoff.md` has the full reasoning. Two things that may not be obvious when you get -there: (a) the polling ceiling of ~113 changes per item per 6h is a real constraint — the -"Most active" widget saturates and needs either a ceiling chip or a different activity -measure; (b) "volatility" is the correct term of art here (dispersion of returns), so don't -talk yourself into "variability" or similar fuzzy synonyms in code or UI. +`price_change_1d/1w`, but nothing that measures _dispersion_. The "latest" and "baseline" values are +both single-bucket averages out of `item_change_log_summaries`, which is why a one-off $10B Ski Mask +listing shows up as a real mover: the bucket average gets pulled into the stratosphere by a single +row, and the "1d ago" bucket often sits mid-spike for items that revert quickly (Scalpel, Rope, +Edomondo Localé). Any redesign should start by reading +`ItemVolatilityStatsRepository.RebuildStatsAsync` and the Flyway `V1.18` migration — the shape of +the output directly falls out of those two. The proposed direction is window-median latest + +- window-median baseline + a stored per-item dispersion measure (MAD of log returns or CV of daily + medians) used to z-score the ranked move. The "Top Movers review" section in `session-handoff.md` + has the full reasoning. Two things that may not be obvious when you get there: (a) the polling + ceiling of ~113 changes per item per 6h is a real constraint — the "Most active" widget saturates + and needs either a ceiling chip or a different activity measure; (b) "volatility" is the correct + term of art here (dispersion of returns), so don't talk yourself into "variability" or similar + fuzzy synonyms in code or UI. diff --git a/context/session-handoff.md b/context/session-handoff.md index 731b8c5..df99103 100644 --- a/context/session-handoff.md +++ b/context/session-handoff.md @@ -6,6 +6,109 @@ rebased on top of the merged dev and pushed to origin, awaiting merge. --- +## 2026-04-24 end-of-session summary (read first) + +**All of the following is on `development` (= `main` once Drew releases the final tweaks).** + +Shipped in chronological order: + +- `e5df589` — TODO quick-wins sweep (Torn market link, sortable tables, login refactor, polish). +- `b19421c` — API key security Phase 1+2 (at-rest AES-GCM + browser proxy). +- `11cb3fd` + `90f0234` — Phase 3 cleanup: drop plaintext `api_key` column, delete `tornapi.ts`; + Codex P1 fixes (unreadable-ciphertext graceful fall-through, V1.20 self-gating guard). +- `fefe466` — handoff refresh. +- `7d6844c` — **Top Movers Phase 1**: median-window latest/baseline, per-item dispersion (CV of + daily medians), z-scored ranking, new columns in V1.21, widget rewired. +- `75d52e8` — Codex P2 fix: sign-gated the z-score sort so risers are strictly positive and fallers + strictly negative. +- `629049e` — SQL fix: `percentile_cont` returns `double precision`, so cast to numeric before + `ROUND(x, n)` (the first deploy failed with + `function round(double precision, integer) does not exist`). + +Top Movers Phase 1 then went to prod. Drew eyeballed it against real item charts. Conclusions: 5/5 +risers looked genuinely unusual (Horse's Head, Raw Ivory, Insulin, Lubricant, Samurai Sword). 2/5 +fallers were borderline (Cocktail Ring, Psycho Clown Mask — both under the 10% move floor anyway). +1/5 fallers (DSLR Camera) showed the post-spike reversion failure mode: the spike from 3-4 days ago +was still inside the 30d baseline, pulling it up, so the current reverted price reads as a "fall". +Same shape as the original Edomondo Localé problem. + +### Uncommitted tweaks from end-of-session + +One local commit pending, currently just files in the working tree — Drew to commit + deploy: + +1. **Trimmed-median baseline** — drop the top 10% and bottom 10% of bucket averages from the + baseline window before taking the median (via `percentile_cont(0.10/0.90)` as bounds). Makes a + single multi-day spike inside the baseline window less influential. +2. **2-day baseline buffer** — baseline is now `NOW-30d` to `NOW-3d` (was `NOW-1d`). Fresh spikes + take 2 extra days to rotate into the baseline. +3. **z-threshold raise** — `MinAbsZScore` bumped from `1.0` to `1.5`. Tightens the filter on + borderline "statistical noise" ranks. + +All three are in `ItemVolatilityStatsRepository.cs` only. Files to commit: that file + the TODO.md +edits. Build clean. + +**Honest caveat**: validating against Drew's data export (see `data-exports/`), these tweaks don't +fully fix the DSLR-Camera-style reversion — the spike lasted multiple days, so a 10% trim + 2-day +buffer aren't aggressive enough. Going further (longer baseline, heavier trim) would start excluding +items with less history. Accepting this limitation is the right move because the intended next piece +of work (see below) reframes the whole card around "unusual activity" rather than "who rose/fell", +which makes the reversion signal legitimate rather than misleading. + +### Next: the "Unusual activity" pivot + +Drew suggested (and I strongly agree) pivoting the Top Movers card from "who went up / down" to +"markets departing from their normal trends". Fundamental reason: with ~29 active keys and 6-hour +polling, we genuinely cannot reliably identify "the top N movers" — we can identify items whose +recent behaviour is statistically unusual, because the departures are large relative to dispersion +even with sparse data. Changing the framing to match what we can actually do. + +Risers/fallers cards stay — Drew's call ("markets are expected to be messy, people will use their +own judgement"). The pivot is about broadening the set of signals shown, not replacing the existing +structure. + +Sketch of the architecture (for next-session planning; not yet built): + +- **Two-step pass.** Hangfire (every 6h) writes a new `item_unusual_candidates` table: ~50 items per + source with rich per-item metrics (multi-horizon medians: 1h/6h/24h/7d; dispersion; sample counts; + 30d range; trimmed baseline; maybe mode-to-nearest-1%-of-range for display). Home-page load hits a + cheap endpoint that joins the shortlist against the freshest bucket data and re-scores. Fresh + data + deep stats without either side being expensive. +- **Multi-horizon z-score.** Each candidate gets a z-score at several horizons; "unusualness" = max + |z|. Short-horizon spikes and slow drifts both surface. +- **"Why flagged" chip per row.** Lets the widget say "Price +X% vs month" or "Dispersion up 4σ" so + users see the reason. +- **Mode-to-bucket** — Drew raised it. Useful as a **display** metric on item detail pages (shows + "where sellers actually cluster"), marginal as a ranking signal. Add to the candidates table when + we do the pivot, but don't rank on it. + +### Two open design questions from the end of this session + +1. **SQL-in-C# vs stored procedures.** My recommendation: keep in C#. One committer, app deploy is + cheap, versioned SQL with the app is easier to review. Only pivot to SPs if non-app consumers + need the logic or the SQL starts needing its own test harness. If individual queries (like the + volatility rebuild) grow much larger, worth moving to embedded `.sql` resources for syntax + highlighting / pgAdmin testing — but not SPs. +2. **Shrink `item_change_log_summaries` bucket from 6h to 1h (or 3h)?** Strongly yes, 1h is the + right target for the pivot. The multi-horizon z-score needs genuine intraday resolution — at 6h + buckets the "1h" and "6h" windows both collapse to "last bucket". Cost: ~6× the row count (~10M + rows/year instead of ~1.6M, still fine), and the summariser job runs proportionally. Migration + gotcha: either rebuild from raw `item_change_logs` (requires that data going back 30d+), run both + bucket sizes in parallel during a cutover, or accept some coarser historical data. Worth planning + carefully. 3h buckets are a compromise that doesn't fully unlock intraday detection. + +### Deferred (still open, see TODO.md) + +- Top Movers review slices (3) volatility-bucket separation, (4) Most active saturation chip, (5) + confidence chips — the pivot above subsumes most of (3) and (5); (4) stands alone. +- Drop legacy `current_price` / `price_change_1d` / `price_change_1w` columns after the pivot. +- Data analysis tools / read-only prod DB access — still parked. + +--- + +## Historical — original session narrative preserved below + +--- + ## Phase 1 — `ui/new-design` (merged) Branch has been merged to `main` and `development` on origin. Started from the plan at