Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ All notable changes to SearchMob Desktop are documented here. The version scheme
for, other people on your network. You can **export, import, and reset** the learned model; the
export uses a portable format shared with the Android app, so you can back it up or move it between
devices.
- **Learns from the served browser page too.** When you search from your browser through the local
server, clicking a result trains the model the same way the in-app results do, through an
owner-only redirect that only ever sends you to the result you clicked. People on your network get
plain result links and never train or see your personalization.
- **Setup wizard re-appears once after a feature update.** Existing users see the wizard again (now
framed as "What's new") when an update adds a step worth seeing, so new opt-in features are not
hidden from people who already onboarded.
Expand Down
6 changes: 3 additions & 3 deletions openspec/changes/add-click-personalization/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@

## 6. Served-page learning (PR D2)

- [ ] 6.1 Add owner-only `/click` route in `server/app.py` with a bounded in-memory `rid -> ordered (url, host)` map; record the skip-above update and 302 to the recorded destination for `rid+pos`; 404 non-owner callers; add `/click` to the owner-gated paths.
- [ ] 6.2 Render result links as `/click?...` only for owner/loopback requests in `server/templates.py`; LAN clients keep bare `<a href>` and are never tracked or personalized.
- [ ] 6.3 Tests: owner click records and redirects correctly; LAN client gets no tracking link and cannot forge `rid`/poison the model; bad `rid`/`pos` fail safe. Gate green; open PR D2.
- [x] 6.1 Add owner-only `/click` route in `server/app.py` with a bounded in-memory `rid -> ordered (url, host)` map; record the skip-above update and 302 to the recorded destination for `rid+pos`; 404 non-owner callers; add `/click` to the owner-gated paths.
- [x] 6.2 Render result links as `/click?...` only for owner/loopback requests in `server/templates.py`; LAN clients keep bare `<a href>` and are never tracked or personalized.
- [x] 6.3 Tests: owner click records and redirects correctly; LAN client gets no tracking link and cannot forge `rid`/poison the model; bad `rid`/`pos` fail safe. Gate green; open PR D2.

## 7. Android parity (separate PRs A1/A2, mobile repo)

Expand Down
14 changes: 13 additions & 1 deletion src/searchmob_desktop/gui/server_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

from searchmob_desktop.data.api_keys import read_vault_api_keys, resolve_api_key
from searchmob_desktop.data.history import HistoryStore, InMemoryHistoryStore
from searchmob_desktop.data.personalization_store import load_personalization
from searchmob_desktop.data.personalization_store import (
load_personalization,
save_personalization,
)
from searchmob_desktop.data.ranking_store import load_ranking_rules, save_ranking_rules
from searchmob_desktop.engines import (
EngineFn,
Expand Down Expand Up @@ -203,6 +206,13 @@ def _personalization() -> PersonalizationModel | None:
return None
return load_personalization()

def _save_personalization(model: PersonalizationModel) -> bool:
# Persist the model after an owner click on the served page. Only writes when enabled,
# so a stale click cannot resurrect learning the owner has turned off.
if not self._prefs_store.load().personalization_enabled:
return False
return save_personalization(model)

app = build_app(
self._engines,
bound_port_getter=lambda: self._port,
Expand All @@ -214,7 +224,9 @@ def _personalization() -> PersonalizationModel | None:
ranking_rules_provider=load_ranking_rules,
ranking_rules_saver=save_ranking_rules,
# The learned click model, applied only for the loopback owner (never network visitors).
# The saver persists owner clicks on the served page back to the encrypted vault.
personalization_provider=_personalization,
personalization_saver=_save_personalization,
# Live prefs so the served Settings page reads and persists preferences without a
# restart. Summary is always wired; the live `summary_enabled` pref gates it.
prefs_provider=self._prefs_store.load,
Expand Down
113 changes: 97 additions & 16 deletions src/searchmob_desktop/server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
import asyncio
import inspect
import json
import secrets
import socket
import time
from collections import OrderedDict
from collections.abc import Awaitable, Callable, Sequence
from dataclasses import asdict, replace
from dataclasses import asdict, dataclass, replace
from ipaddress import ip_address
from urllib.parse import parse_qsl, urlsplit

Expand Down Expand Up @@ -60,6 +62,7 @@
parse_goggles,
personalize_reorder,
)
from searchmob_desktop.engines.rank.personalize import query_terms, update_from_click
from searchmob_desktop.engines.rank.slop_blocklist import load_slop_domains
from searchmob_desktop.engines.sort import SortMode, sort_results
from searchmob_desktop.engines.verticals import Vertical, default_sort, transform_query
Expand Down Expand Up @@ -324,6 +327,22 @@ def _suggestions_body(query: str, suggestions: Sequence[str]) -> str:
# through `aggregate`. The default plumbs through to the real `aggregate(ctx, engines)` call.
_MetasearchFn = Callable[[EngineContext, Sequence[EngineFn]], Awaitable[list[SearchResult]]]

# Upper bound on remembered result renders for owner click-tracking (one per recent owner search).
# Small on purpose: only the most recent few pages need clickable tracking links at once.
_RENDER_CACHE_MAX = 64


@dataclass(frozen=True)
class _RenderedResults:
"""One owner-rendered result page: the query and the displayed (url, host) order.

Held briefly in memory so the owner-only `/click` endpoint can resolve a click to its result and
its skipped-above neighbors using server state, never a client-supplied URL. Never persisted.
"""

query: str
items: list[tuple[str, str | None]]


def build_app(
engines: Sequence[EngineFn],
Expand All @@ -336,6 +355,7 @@ def build_app(
ranking_rules_provider: Callable[[], RankingRules] | None = None,
ranking_rules_saver: Callable[[RankingRules], bool] | None = None,
personalization_provider: Callable[[], PersonalizationModel | None] | None = None,
personalization_saver: Callable[[PersonalizationModel], bool] | None = None,
prefs_provider: Callable[[], UserPreferences] | None = None,
prefs_saver: Callable[[UserPreferences], bool] | None = None,
history_provider: Callable[[], list[HistoryEntry]] | None = None,
Expand Down Expand Up @@ -399,6 +419,10 @@ def _clamp(raw: str | None) -> str:
static_rules = ranking_rules if ranking_rules is not None else RankingRules()
rules_provider: Callable[[], RankingRules] = ranking_rules_provider or (lambda: static_rules)

# Per-app, in-memory map of recent owner renders (render id -> displayed order), used only to
# resolve owner clicks on the served page back to their result for the `/click` endpoint.
_render_cache: OrderedDict[str, _RenderedResults] = OrderedDict()

def _load_prefs() -> UserPreferences | None:
# Read live preferences per request so the served Settings toggles apply without a restart.
# Fail-soft: any error reading prefs falls back to the static defaults passed to build_app.
Expand All @@ -414,7 +438,7 @@ async def _run_metasearch(
sort_mode: SortMode = SortMode.FRESH_RELEVANT,
vertical: Vertical = Vertical.WEB,
*,
owner: bool = False,
model: PersonalizationModel | None = None,
) -> list[SearchResult]:
if not query.strip() or not engines:
return []
Expand All @@ -428,16 +452,14 @@ async def _run_metasearch(
prefs = _load_prefs()
slop_mode = prefs.ai_slop_mode if prefs is not None else ai_slop_mode
# Sort (relevance/date/freshness blend), then nudge by the owner's learned click model (only
# for the loopback owner, never a network visitor), then apply the user's rules so the
# served results match the in-app results and PIN/RAISE preserve the order.
# when a model is passed, which the caller does for the loopback owner), then apply the
# user's rules so the served results match the in-app results and PIN/RAISE preserve order.
now_ms = int(time.time() * 1000)
ordered = sort_results(results, sort_mode, query, now_ms)
if owner and personalization_provider is not None:
model = personalization_provider()
if model is not None:
ordered = personalize_reorder(
ordered, lambda r: host_of_url(r.url), query, model, now_ms
)
if model is not None:
ordered = personalize_reorder(
ordered, lambda r: host_of_url(r.url), query, model, now_ms
)
return apply_ranking(
ordered,
rules_provider(),
Expand All @@ -447,6 +469,27 @@ async def _run_metasearch(
slop_mode=slop_mode,
)

def _owner_model(request: Request) -> PersonalizationModel | None:
# The learned model for the loopback owner, or None when off / disabled / not the owner.
# Used to personalize the owner's results and to gate click-tracking links and `/click`.
if personalization_provider is None or not _is_loopback_request(request):
return None
try:
return personalization_provider()
except Exception:
return None

def _register_render(query: str, results: list[SearchResult]) -> str:
# Remember the exact displayed order under a fresh, unguessable id so an owner click can be
# matched to its result (and its skipped-above neighbors) without trusting any client input.
# Bounded and in-memory only; never persisted.
rid = secrets.token_urlsafe(9)
items = [(r.url, host_of_url(r.url)) for r in results]
_render_cache[rid] = _RenderedResults(query=query, items=items)
while len(_render_cache) > _RENDER_CACHE_MAX:
_render_cache.popitem(last=False) # evict the oldest render
return rid

def _correction(query: str) -> str | None:
# On-device "did you mean". `suggest` is fail-soft and already returns None when the
# corrected query equals the input, so any non-None result is a genuine suggestion.
Expand Down Expand Up @@ -517,10 +560,15 @@ async def search_html(request: Request) -> Response:
# Fetch the contextual summary concurrently with the metasearch so the box never adds
# latency to the results path.
summary_task = asyncio.ensure_future(_maybe_summary(query))
results = await _run_metasearch(
query, sort_mode, vertical, owner=_is_loopback_request(request)
)
model = _owner_model(request)
results = await _run_metasearch(query, sort_mode, vertical, model=model)
summary = await summary_task
# When personalization is on for the owner, route result links through `/click` so a click
# can train the model; everyone else (and a disabled owner) gets the plain destination link.
link_builder: Callable[[int, str], str] | None = None
if model is not None and results:
rid = _register_render(query, results)
link_builder = lambda pos, _url, _rid=rid: f"/click?rid={_rid}&pos={pos}" # noqa: E731
body = render_results_page(
query,
results,
Expand All @@ -532,9 +580,41 @@ async def search_html(request: Request) -> Response:
sort_mode=sort_mode.value,
vertical=vertical.value,
settings_link=_is_settings_owner(request),
link_builder=link_builder,
)
return Response(body, media_type="text/html; charset=utf-8")

async def click(request: Request) -> Response:
# Owner-only redirector that learns from a click on the served results page. It resolves the
# destination from server-side render state (never a caller-supplied URL), so it cannot be
# an open redirect, and it trains only the loopback owner's model.
if not _is_loopback_request(request):
return PlainTextResponse("not found", status_code=404)
render = _render_cache.get(request.query_params.get("rid", ""))
if render is None:
return RedirectResponse("/", status_code=303)
try:
pos = int(request.query_params.get("pos", ""))
except ValueError:
return RedirectResponse("/", status_code=303)
if pos < 0 or pos >= len(render.items):
return RedirectResponse("/", status_code=303)
dest_url, _host = render.items[pos]
if not is_safe_http_url(dest_url):
return RedirectResponse("/", status_code=303)
if personalization_provider is not None and personalization_saver is not None:
model = _owner_model(request)
if model is not None:
try:
hosts = [host for _u, host in render.items]
update_from_click(
model, hosts, pos, query_terms(render.query), int(time.time() * 1000)
)
personalization_saver(model)
except Exception:
pass # Learning is best-effort; never block the navigation on it.
return RedirectResponse(dest_url, status_code=302)

def _redirect_back(request: Request) -> Response:
# Return to the page the POST came from when it is one of our own origins; else home. 303
# makes the browser re-fetch with GET so a refresh does not re-POST.
Expand Down Expand Up @@ -683,9 +763,7 @@ async def search_json(request: Request) -> Response:
vertical = Vertical.from_value(request.query_params.get("vertical"))
sort_param = request.query_params.get("sort")
sort_mode = SortMode.from_value(sort_param) if sort_param else default_sort(vertical)
results = await _run_metasearch(
query, sort_mode, vertical, owner=_is_loopback_request(request)
)
results = await _run_metasearch(query, sort_mode, vertical, model=_owner_model(request))
payload = {
"query": query,
"results": [asdict(result) for result in results],
Expand Down Expand Up @@ -720,6 +798,9 @@ async def suggest(request: Request) -> Response:
Route("/api/search", search_json, methods=["GET"]),
Route("/opensearch.xml", opensearch_xml, methods=["GET"]),
Route("/suggest", suggest, methods=["GET"]),
# Owner-only click redirector that trains the personalization model (loopback-only; a
# non-owner gets 404, and it only redirects to a server-recorded result URL).
Route("/click", click, methods=["GET"]),
# Personalization edits from the served UI. Gated loopback-only + same-origin in the
# middleware; no-op (503) when no saver is wired.
Route("/rules/domain", set_domain_rule, methods=["POST"]),
Expand Down
11 changes: 8 additions & 3 deletions src/searchmob_desktop/server/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ def render_results_page(
sort_mode: str = "fresh",
vertical: str = "web",
settings_link: bool = False,
link_builder: Callable[[int, str], str] | None = None,
) -> str:
"""The results page. Empty/blank query -> a placeholder; otherwise -> the merged results.

Expand Down Expand Up @@ -510,14 +511,18 @@ def render_results_page(
parts.append(_sort_bar(query, sort_mode))
if editable:
parts.append(_scope_bar(active_rules))
for result in results_list:
for index, result in enumerate(results_list):
parts.append('<div class="result">')
parts.append(f'<div class="url">{escape(_display_url(result.url))}</div>')
if is_safe_http_url(result.url):
# rel=noreferrer backs up the Referrer-Policy header so the query (in the loopback
# URL) never leaks to the destination; noopener severs window.opener.
# URL) never leaks to the destination; noopener severs window.opener. When a
# link_builder is wired (owner + personalization on), the anchor points at the
# owner-only `/click` redirector so the click can train the model; otherwise it is
# the plain destination URL.
href = link_builder(index, result.url) if link_builder is not None else result.url
parts.append(
f'<a href="{escape(result.url, quote=True)}" class="title" '
f'<a href="{escape(href, quote=True)}" class="title" '
f'rel="noopener noreferrer">{escape(result.title)}</a>'
)
else:
Expand Down
Loading