diff --git a/buckaroo/customizations/pandas_commands.py b/buckaroo/customizations/pandas_commands.py index 242c184ad..3a06c2427 100644 --- a/buckaroo/customizations/pandas_commands.py +++ b/buckaroo/customizations/pandas_commands.py @@ -473,16 +473,21 @@ class Search(Command): command_pattern = [[3, 'term', 'type', 'string']] quick_args_pattern = [[3, 'term', 'type', 'string']] - @staticmethod + @staticmethod def transform(df, col, val): - #print("search_df", val) if val is None or val == "": - #print("no search term set") return df - return search_df_str(df, val) + filtered = search_df_str(df, val) + # search_df_str uses literal `Series.str.find`, so expose the term + # as highlight_phrase (list) — not highlight_regex — for matching + # filter semantics on the JS-side string displayer. + str_cols = list(df.select_dtypes("string").columns) + str_cols.extend(list(df.select_dtypes("object").columns)) + sd_updates = {c: {'highlight_phrase': [val]} for c in str_cols} + return SDResult(filtered, sd_updates) - @staticmethod + @staticmethod def transform_to_py(df, col, val): return f""" from buckaroo.customizations.pandas_commands import search_df_str return search_df_str(df, '{val}')""" diff --git a/tests/unit/commands/pandas_commands_test.py b/tests/unit/commands/pandas_commands_test.py index 0d0fc09b9..e6d2142fd 100644 --- a/tests/unit/commands/pandas_commands_test.py +++ b/tests/unit/commands/pandas_commands_test.py @@ -378,12 +378,15 @@ def test_search_none_needle(): def test_search_with_match(): - """Test Search filters rows containing the needle.""" + """Test Search filters rows containing the needle. Search returns + SDResult(filtered_df, sd_updates) so the JS side can highlight the + search term — unwrap the df for the length assertion.""" base_df = pd.DataFrame({ 'name': pd.Series(['Alice', 'Bob', 'Charlie'], dtype='object'), 'b': [1, 2, 3]}) result = Search.transform(base_df.copy(), 'name', 'Bob') - assert len(result) == 1 + assert len(result.df) == 1 + assert result.sd_updates['name'] == {'highlight_phrase': ['Bob']} def test_search_to_py(): diff --git a/tests/unit/dataflow/autocleaning_pd_test.py b/tests/unit/dataflow/autocleaning_pd_test.py index fe2547f5e..b0467dad2 100644 --- a/tests/unit/dataflow/autocleaning_pd_test.py +++ b/tests/unit/dataflow/autocleaning_pd_test.py @@ -8,10 +8,7 @@ from buckaroo.dataflow.autocleaning import AutocleaningConfig from buckaroo.dataflow.autocleaning import PandasAutocleaning, generate_quick_ops from buckaroo.jlisp.lisp_utils import (s, sA, sQ) -from buckaroo.customizations.pandas_commands import ( - Command, - SafeInt, DropCol, FillNA, GroupBy, NoOp, Search, OnlyOutliers -) +from buckaroo.customizations.pandas_commands import (Command, SafeInt, DropCol, FillNA, GroupBy, NoOp, Search, OnlyOutliers) from buckaroo.customizations.pd_autoclean_conf import (NoCleaningConf) from buckaroo.dataflow.dataflow import CustomizableDataflow @@ -624,3 +621,29 @@ def test_two_arg_quick_command(): + + +class SearchConf(AutocleaningConfig): + autocleaning_analysis_klasses = [] + command_klasses = [Search] + name = "" + + +def test_search_threads_highlight_phrase_into_cleaning_sd_under_rename(): + """Search plumbs its search term into cleaning_sd as highlight_phrase on + every string/object column. The rest of the sd is keyed by buckaroo's + internal a/b/c names, so autocleaning rewrites the op-supplied keys to + match — otherwise the entries would sit alongside as orphans without + a `_type` and trip the styling fallback. pandas Search uses literal + str.find, so highlight_phrase (not _regex) matches the filter semantics.""" + ac = PandasAutocleaning([SearchConf]) + # 'businessname' (object) becomes 'a', 'rating' (int) becomes 'b'. + df = pd.DataFrame({'businessname': ['pizza', 'sushi'], 'rating': [5, 4]}) + search_op = [{'symbol': 'search'}, s('df'), 'col', 'pizza'] + + _cleaned, cleaning_sd, _gen, _ops = ac.handle_ops_and_clean( + df, cleaning_method='', quick_command_args={}, existing_operations=[search_op]) + + assert cleaning_sd.get('a', {}).get('highlight_phrase') == ['pizza'] + assert 'businessname' not in cleaning_sd + assert 'highlight_phrase' not in cleaning_sd.get('b', {}) diff --git a/tests/unit/dataflow/customizable_dataflow_test.py b/tests/unit/dataflow/customizable_dataflow_test.py index 5f207cfc2..394cb69ac 100644 --- a/tests/unit/dataflow/customizable_dataflow_test.py +++ b/tests/unit/dataflow/customizable_dataflow_test.py @@ -581,3 +581,38 @@ class VCBuckarooWidget(BuckarooWidget): [sQ('search'), s('df'), "col", "needle"]] assert len(vcb.dataflow.processed_df) == 2 assert vcb.df_meta['filtered_rows'] == 2 + + +def _find_cc(column_config, col_name): + for entry in column_config: + if entry.get('col_name') == col_name: + return entry + raise AssertionError(f"col_name {col_name!r} not in column_config") + + +def test_search_op_delivers_highlight_phrase_into_displayer_args(): + """End-to-end: a `search` operation on the widget should plumb its + search term into `displayer_args.highlight_phrase` for every string + column in the final df_viewer_config that gets sent to the JS side. + pandas Search uses literal str.find, so the search term flows as + highlight_phrase (not highlight_regex).""" + df = pd.DataFrame({ + 'businessname': ['pizza', 'sushi', 'taco'], + 'comments': ['area code', 'no match', 'area zone'], + 'rating': [5, 4, 3]}) + + class VCBuckarooWidget(BuckarooInfiniteWidget): + autoclean_conf = tuple([NoCleaningConf]) + + w = VCBuckarooWidget(df, debug=False) + w.dataflow.operations = [[{'symbol': 'search'}, s('df'), 'col', 'area']] + + cc = w.df_display_args['main']['df_viewer_config']['column_config'] + a_args = _find_cc(cc, 'a')['displayer_args'] + assert a_args['displayer'] == 'string' + assert a_args['highlight_phrase'] == ['area'] + b_args = _find_cc(cc, 'b')['displayer_args'] + assert b_args['displayer'] == 'string' + assert b_args['highlight_phrase'] == ['area'] + c_args = _find_cc(cc, 'c')['displayer_args'] + assert 'highlight_phrase' not in c_args