diff --git a/buckaroo/xorq_buckaroo.py b/buckaroo/xorq_buckaroo.py index cb03b742b..0dbda1c45 100644 --- a/buckaroo/xorq_buckaroo.py +++ b/buckaroo/xorq_buckaroo.py @@ -13,7 +13,7 @@ import logging import traceback from io import BytesIO -from typing import Any +from typing import Any, Dict, Tuple import pandas as pd import pyarrow as pa @@ -24,7 +24,8 @@ from .customizations.styling import DefaultMainStyling, DefaultSummaryStatsStyling from .customizations.xorq_stats_v2 import XORQ_STATS_V2 from .dataflow.autocleaning import ( - AutocleaningConfig, PandasAutocleaning, generate_quick_ops, merge_ops, ops_eq) + AutocleaningConfig, PandasAutocleaning, _rekey_op_sd_to_internal, + generate_quick_ops, merge_ops, ops_eq) from .dataflow.dataflow import CustomizableDataflow from .dataflow.dataflow_extras import Sampling from .df_util import old_col_new_col @@ -90,11 +91,17 @@ def serialize_sample(cls, df_or_expr): def _xorq_search(expr, _col, val): - """Filter rows where any string column contains ``val``. + """Filter rows where any string column contains ``val``, and emit + ``sd_updates`` so the JS-side string displayer highlights matches. - Mirrors the contract of the pandas / polars Search commands: an - empty value short-circuits to a no-op so the frontend can clear - the search by sending ``""``. + Return shape mirrors the SDResult contract from #758: + - bare expr : empty value short-circuits to a no-op + - (expr, sd_updates) : filtered expr + per-string-column highlight + + ibis ``StringValue.contains`` is a literal substring match (not + regex), so the term flows as ``highlight_phrase`` (list) rather + than ``highlight_regex`` — matching the filter semantics on the + JS-side string displayer. """ if val is None or val == "": return expr @@ -106,7 +113,8 @@ def _xorq_search(expr, _col, val): for c in string_cols: c_cond = expr[c].contains(val) cond = c_cond if cond is None else cond | c_cond - return expr.filter(cond) + sd_updates = {c: {'highlight_phrase': [val]} for c in string_cols} + return expr.filter(cond), sd_updates class XorqSearch: @@ -163,19 +171,45 @@ def handle_ops_and_clean(self, df, cleaning_method, quick_command_args, existing final_ops = merge_ops(existing_for_merge, quick_ops) if not final_ops: return [df, {}, "", []] - result = self._apply_xorq_ops(df, final_ops) - return [result, {}, "", final_ops] + result_expr, sd_updates = self._apply_xorq_ops(df, final_ops) + # Rekey op-supplied sd entries from orig col names onto buckaroo's + # internal a/b/c letter keys, so they merge cleanly with the + # summary_sd that XorqDataflow._get_summary_sd produces (also + # keyed by letter). Every current handler preserves column + # identity (filter ops), so orig→letter is the same on the input + # and the result. The assert guards against a future handler that + # renames/drops/reorders columns silently corrupting the rekey. + assert list(result_expr.columns) == list(df.columns), ( + "xorq op changed column identity — _rekey_op_sd_to_internal " + "would mis-map sd entries; rekey against result_expr or " + "thread the orig→letter mapping through _apply_xorq_ops") + cleaning_sd = _rekey_op_sd_to_internal(sd_updates, df) + return [result_expr, cleaning_sd, "", final_ops] @staticmethod - def _apply_xorq_ops(expr, ops): + def _apply_xorq_ops(expr, ops) -> Tuple[Any, Dict[str, Dict[str, Any]]]: + """Apply ops to ``expr``; accumulate op-contributed sd entries. + + Each handler may return either a bare expr (legacy) or an + ``(expr, sd_updates)`` tuple. Tuples merge col-by-col into the + running sd_updates dict so multiple ops touching the same + column compose. + """ + sd_updates: Dict[str, Dict[str, Any]] = {} for op in ops: sym_name = op[0]['symbol'] if isinstance(op[0], dict) else op[0] handler = _XORQ_OP_HANDLERS.get(sym_name) if handler is None: continue handler_args = op[2:] - expr = handler(expr, *handler_args) - return expr + result = handler(expr, *handler_args) + if isinstance(result, tuple): + expr, op_sd = result + for col, updates in op_sd.items(): + sd_updates.setdefault(col, {}).update(updates) + else: + expr = result + return expr, sd_updates class XorqDataflow(CustomizableDataflow): diff --git a/tests/unit/test_xorq_buckaroo_widget.py b/tests/unit/test_xorq_buckaroo_widget.py index 13336da7e..2bdfde510 100644 --- a/tests/unit/test_xorq_buckaroo_widget.py +++ b/tests/unit/test_xorq_buckaroo_widget.py @@ -182,6 +182,58 @@ def test_empty_search_clears_filter(self): assert w.df_meta["filtered_rows"] == 5 +def _find_cc(column_config, col_name): + for entry in column_config: + if entry.get("col_name") == col_name: + return entry + raise AssertionError(f"col_name {col_name!r} not in column_config") + + +class TestSearchHighlight: + """Equivalent of the polars-search highlight wiring from #758, ported + to the xorq backend. ibis ``StringValue.contains`` is a literal + substring match, so the search term flows to the JS displayer as + ``highlight_phrase`` (list), not ``highlight_regex``.""" + + def test_search_op_delivers_highlight_phrase_into_displayer_args(self): + """End-to-end through XorqBuckarooWidget — a `search` op should + plumb its term into ``displayer_args.highlight_phrase`` for every + ibis-String column and skip non-string columns.""" + w = XorqBuckarooWidget(_searchable_expr()) + state = w.buckaroo_state.copy() + state["quick_command_args"] = {"search": ["admin"]} + w.buckaroo_state = state + + cc = w.df_display_args["main"]["df_viewer_config"]["column_config"] + # 'a' is name (string) + a_args = _find_cc(cc, "a")["displayer_args"] + assert a_args["displayer"] == "string" + assert a_args["highlight_phrase"] == ["admin"] + # 'b' is role (string) + b_args = _find_cc(cc, "b")["displayer_args"] + assert b_args["displayer"] == "string" + assert b_args["highlight_phrase"] == ["admin"] + # 'c' is score (integer) — no highlight + c_args = _find_cc(cc, "c")["displayer_args"] + assert "highlight_phrase" not in c_args + + def test_empty_search_drops_highlight_from_displayer_args(self): + """Clearing the search box (``""``) should remove the highlight + from displayer_args, matching the filter going back to no-op.""" + w = XorqBuckarooWidget(_searchable_expr()) + state = w.buckaroo_state.copy() + state["quick_command_args"] = {"search": ["admin"]} + w.buckaroo_state = state + + state = w.buckaroo_state.copy() + state["quick_command_args"] = {"search": [""]} + w.buckaroo_state = state + + cc = w.df_display_args["main"]["df_viewer_config"]["column_config"] + a_args = _find_cc(cc, "a")["displayer_args"] + assert "highlight_phrase" not in a_args + + def _paginated_expr(): return xo.memtable( {"a": [3, 1, 4, 1, 5, 9, 2, 6, 5, 3], "b": ["p", "q", "r", "s", "t", "u", "v", "w", "x", "y"]})