-
Notifications
You must be signed in to change notification settings - Fork 69
feat: Implement single-column sorting for interactive table widget #2255
Changes from 1 commit
65d90c2
75174e3
a31771a
021d35a
f5420d2
8fac06c
0e40d69
b4dcce7
0680139
688ec48
b0f051c
eb2f648
e4e302c
7f747b7
07634b9
9669a39
6abc1d6
580b492
96e49eb
a708c57
25a6ade
a04c92b
7d48abd
f9bac38
7b87d2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,8 +16,9 @@ | |
|
|
||
| from importlib import resources | ||
| import functools | ||
| import logging | ||
| import math | ||
| from typing import Any, Dict, Iterator, List, Optional, Type | ||
| from typing import Any, Dict, Iterator, List, Optional, Tuple, Type | ||
| import uuid | ||
|
|
||
| import pandas as pd | ||
|
|
@@ -57,6 +58,8 @@ class TableWidget(WIDGET_BASE): | |
| page_size = traitlets.Int(0).tag(sync=True) | ||
| row_count = traitlets.Int(0).tag(sync=True) | ||
| table_html = traitlets.Unicode().tag(sync=True) | ||
| sort_column = traitlets.Unicode("").tag(sync=True) | ||
| sort_ascending = traitlets.Bool(True).tag(sync=True) | ||
| _initial_load_complete = traitlets.Bool(False).tag(sync=True) | ||
| _batches: Optional[blocks.PandasBatches] = None | ||
| _error_message = traitlets.Unicode(allow_none=True, default_value=None).tag( | ||
|
|
@@ -83,6 +86,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): | |
| self._all_data_loaded = False | ||
| self._batch_iter: Optional[Iterator[pd.DataFrame]] = None | ||
| self._cached_batches: List[pd.DataFrame] = [] | ||
| self._last_sort_state: Optional[Tuple[str, bool]] = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can guess based on context what these mean, but I think a frozen dataclass would be much easier to understand at a glance. |
||
|
|
||
| # respect display options for initial page size | ||
| initial_page_size = bigframes.options.display.max_rows | ||
|
|
@@ -215,6 +219,27 @@ def _set_table_html(self) -> None: | |
| ) | ||
| return | ||
|
|
||
| # Apply sorting if a column is selected | ||
| df_to_display = self._dataframe | ||
| if self.sort_column: | ||
| try: | ||
| df_to_display = df_to_display.sort_values( | ||
| by=self.sort_column, ascending=self.sort_ascending | ||
| ) | ||
| except KeyError: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid of this. KeyError is a relatively common error in Python. Just looking at this code, it's not clear to me that this would always be the case of a missing column. Please check for the column presence explicitly, instead.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should trigger this error to be honest. The user only allows the click on a sortable column name to trigger this part of code. Theoretically, we do not even need to catch this bug, because this exception should never be hit in our design. I will remove this try catch block. |
||
| logging.warning( | ||
| f"Attempted to sort by unknown column: {self.sort_column}" | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we catching this exception and dropping it? If we get to this state something bad has happened and the user should know about it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is my plan:
|
||
|
|
||
| # Reset batches when sorting changes | ||
| if self._last_sort_state != (self.sort_column, self.sort_ascending): | ||
| self._batches = df_to_display._to_pandas_batches(page_size=self.page_size) | ||
| self._cached_batches = [] | ||
| self._batch_iter = None | ||
| self._all_data_loaded = False | ||
| self._last_sort_state = (self.sort_column, self.sort_ascending) | ||
| self.page = 0 # Reset to first page | ||
|
|
||
| start = self.page * self.page_size | ||
| end = start + self.page_size | ||
|
|
||
|
|
@@ -235,6 +260,11 @@ def _set_table_html(self) -> None: | |
| table_id=f"table-{self._table_id}", | ||
| ) | ||
|
|
||
| @traitlets.observe("sort_column", "sort_ascending") | ||
| def _sort_changed(self, _change: Dict[str, Any]): | ||
| """Handler for when sorting parameters change from the frontend.""" | ||
| self._set_table_html() | ||
|
|
||
| @traitlets.observe("page") | ||
| def _page_changed(self, _change: Dict[str, Any]) -> None: | ||
| """Handler for when the page number is changed from the frontend.""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -19,6 +19,8 @@ const ModelProperty = { | |||
| PAGE_SIZE: "page_size", | ||||
| ROW_COUNT: "row_count", | ||||
| TABLE_HTML: "table_html", | ||||
| SORT_COLUMN: "sort_column", | ||||
| SORT_ASCENDING: "sort_ascending", | ||||
| }; | ||||
|
|
||||
| const Event = { | ||||
|
|
@@ -124,6 +126,36 @@ function render({ model, el }) { | |||
| // Note: Using innerHTML is safe here because the content is generated | ||||
| // by a trusted backend (DataFrame.to_html). | ||||
| tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); | ||||
|
|
||||
| // Add click handlers to column headers for sorting | ||||
| const headers = tableContainer.querySelectorAll("th"); | ||||
| headers.forEach((header) => { | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is quite a bit of logic. I would like to get JavaScript-level unit tests setup before merging this PR.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've set up a comprehensive JavaScript unit testing environment and implemented tests for the Testing Framework & Environment: Test Coverage: The
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Could you also add a workflow for this to our GitHub Actions folder? https://github.com/googleapis/python-bigquery-dataframes/tree/main/.github/workflows Something like this: |
||||
| const columnName = header.textContent.trim(); | ||||
| if (columnName) { | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not all data types are sortable. See the orderable property in our dtypes.
We should not add the handler or arrow to columns that we can't sort.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the plan:
|
||||
| header.style.cursor = "pointer"; | ||||
| header.addEventListener(Event.CLICK, () => { | ||||
| const currentSortColumn = model.get(ModelProperty.SORT_COLUMN); | ||||
| const currentSortAscending = model.get(ModelProperty.SORT_ASCENDING); | ||||
|
|
||||
| if (currentSortColumn === columnName) { | ||||
| // Toggle sort direction | ||||
| model.set(ModelProperty.SORT_ASCENDING, !currentSortAscending); | ||||
| } else { | ||||
| // New column, default to ascending | ||||
| model.set(ModelProperty.SORT_COLUMN, columnName); | ||||
| model.set(ModelProperty.SORT_ASCENDING, true); | ||||
| } | ||||
| model.save_changes(); | ||||
| }); | ||||
|
|
||||
| // Add visual indicator for sorted column | ||||
| if (model.get(ModelProperty.SORT_COLUMN) === columnName) { | ||||
| const arrow = model.get(ModelProperty.SORT_ASCENDING) ? " ▲" : " ▼"; | ||||
| header.textContent = columnName + arrow; | ||||
| } | ||||
| } | ||||
| }); | ||||
|
|
||||
| updateButtonStates(); | ||||
| } | ||||
|
|
||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have we considered having multiple columns as a possibility? I think a single column is a good starting point, but I think it's an alternative worth considering, especially when a particular column contains lots of duplicate values, like a "date" column.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that multi-column sorting is particularly valuable when a column has many duplicate values. I would like to get the single column sorting checked in first as a PR. Then check in a second PR for multi-column sorting. This current PR is already complex enough. I prefer two separate PRs as enhancements.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, separate PR makes sense to me, thanks.