-
Notifications
You must be signed in to change notification settings - Fork 69
feat: Implement single-column sorting for interactive table widget #2255
Changes from 7 commits
65d90c2
75174e3
a31771a
021d35a
f5420d2
8fac06c
0e40d69
b4dcce7
0680139
688ec48
b0f051c
eb2f648
e4e302c
7f747b7
07634b9
9669a39
6abc1d6
580b492
96e49eb
a708c57
25a6ade
a04c92b
7d48abd
f9bac38
7b87d2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
| from importlib import resources | ||
| import functools | ||
| import math | ||
| from typing import Any, Dict, Iterator, List, Optional, Type | ||
| from typing import Any, Dict, Iterator, List, Optional, Tuple, Type | ||
| import uuid | ||
|
|
||
| import pandas as pd | ||
|
|
@@ -26,6 +26,7 @@ | |
| from bigframes.core import blocks | ||
| import bigframes.dataframe | ||
| import bigframes.display.html | ||
| import bigframes.dtypes as dtypes | ||
|
|
||
| # anywidget and traitlets are optional dependencies. We don't want the import of | ||
| # this module to fail if they aren't installed, though. Instead, we try to | ||
|
|
@@ -61,6 +62,9 @@ class TableWidget(WIDGET_BASE): | |
| allow_none=True, | ||
| ).tag(sync=True) | ||
| table_html = traitlets.Unicode().tag(sync=True) | ||
| sort_column = traitlets.Unicode("").tag(sync=True) | ||
| sort_ascending = traitlets.Bool(True).tag(sync=True) | ||
| orderable_columns = traitlets.List(traitlets.Unicode(), []).tag(sync=True) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the general case, column names could be any "Hashable" value, including integers import bigframes.pandas as bpd
import pandas as pd
bpd.options.bigquery.project = "swast-scratch"
df = bpd.DataFrame([[0, 1], [2, 3]], columns=pd.Index([1, 2]))
print(df)
print(df[1])
print(df[2])Also, another very important case is the MultiIndex: df = bpd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
pdf = df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])
print(pdf.columns)
print(pdf[("baz", "A")])This isn't needed for SQL Cell, but we do need to make sure we function correctly in the general case. Could you test with these types of DataFrames? If it's not feasible to support these in this PR, please avoid doing the sorting feature for those DataFrames for now and create a bug and a TODO to expand our support for any column label.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I disable these two cases for now and filed b/463754889 |
||
| _initial_load_complete = traitlets.Bool(False).tag(sync=True) | ||
| _batches: Optional[blocks.PandasBatches] = None | ||
| _error_message = traitlets.Unicode(allow_none=True, default_value=None).tag( | ||
|
|
@@ -87,15 +91,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): | |
| self._all_data_loaded = False | ||
| self._batch_iter: Optional[Iterator[pd.DataFrame]] = None | ||
| self._cached_batches: List[pd.DataFrame] = [] | ||
| self._last_sort_state: Optional[Tuple[str, bool]] = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can guess based on context what these mean, but I think a frozen dataclass would be much easier to understand at a glance. |
||
|
|
||
| # respect display options for initial page size | ||
| initial_page_size = bigframes.options.display.max_rows | ||
|
|
||
| # set traitlets properties that trigger observers | ||
| self.page_size = initial_page_size | ||
| self.orderable_columns = [ | ||
| col | ||
| for col in dataframe.columns | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should iterate through dtypes directly to get both the type and the column name in one call. Calling Aside: could you file an issue to check our limitations on the number of columns from a ux perspective? I imagine we'll need to solve that at some point soon-ish.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Complexity is reduced based on the suggestion |
||
| if dtypes.is_orderable(dataframe.dtypes[col]) | ||
| ] | ||
|
|
||
| # len(dataframe) is expensive, since it will trigger a | ||
| # SELECT COUNT(*) query. It is a must have however. | ||
| # obtain the row counts | ||
| # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()` | ||
| # before we get here so that the count might already be cached. | ||
| self._reset_batches_for_new_page_size() | ||
|
|
@@ -113,6 +122,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): | |
| self.row_count = self._batches.total_rows | ||
|
|
||
| # get the initial page | ||
| self._get_next_batch() | ||
| self._set_table_html() | ||
|
|
||
| # Signals to the frontend that the initial data load is complete. | ||
|
|
@@ -235,6 +245,27 @@ def _set_table_html(self) -> None: | |
| ) | ||
| return | ||
|
|
||
| # Apply sorting if a column is selected | ||
| df_to_display = self._dataframe | ||
| if self.sort_column: | ||
| try: | ||
| df_to_display = df_to_display.sort_values( | ||
| by=self.sort_column, ascending=self.sort_ascending | ||
| ) | ||
| except KeyError: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid of this. KeyError is a relatively common error in Python. Just looking at this code, it's not clear to me that this would always be the case of a missing column. Please check for the column presence explicitly, instead.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should trigger this error to be honest. The user only allows the click on a sortable column name to trigger this part of code. Theoretically, we do not even need to catch this bug, because this exception should never be hit in our design. I will remove this try catch block. |
||
| self._error_message = f"Column '{self.sort_column}' not found. Please select a valid column to sort by." | ||
| # Revert to unsorted state if sorting fails | ||
| self.sort_column = "" | ||
|
|
||
| # Reset batches when sorting changes | ||
| if self._last_sort_state != (self.sort_column, self.sort_ascending): | ||
| self._batches = df_to_display._to_pandas_batches(page_size=self.page_size) | ||
| self._cached_batches = [] | ||
| self._batch_iter = None | ||
| self._all_data_loaded = False | ||
| self._last_sort_state = (self.sort_column, self.sort_ascending) | ||
| self.page = 0 # Reset to first page | ||
|
|
||
| start = self.page * self.page_size | ||
| end = start + self.page_size | ||
|
|
||
|
|
@@ -270,8 +301,14 @@ def _set_table_html(self) -> None: | |
| self.table_html = bigframes.display.html.render_html( | ||
| dataframe=page_data, | ||
| table_id=f"table-{self._table_id}", | ||
| orderable_columns=self.orderable_columns, | ||
| ) | ||
|
|
||
| @traitlets.observe("sort_column", "sort_ascending") | ||
| def _sort_changed(self, _change: Dict[str, Any]): | ||
| """Handler for when sorting parameters change from the frontend.""" | ||
| self._set_table_html() | ||
|
|
||
| @traitlets.observe("page") | ||
| def _page_changed(self, _change: Dict[str, Any]) -> None: | ||
| """Handler for when the page number is changed from the frontend.""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,10 @@ const ModelProperty = { | |
| PAGE_SIZE: "page_size", | ||
| ROW_COUNT: "row_count", | ||
| TABLE_HTML: "table_html", | ||
| SORT_COLUMN: "sort_column", | ||
| SORT_ASCENDING: "sort_ascending", | ||
| ERROR_MESSAGE: "error_message", | ||
| ORDERABLE_COLUMNS: "orderable_columns", | ||
| }; | ||
|
|
||
| const Event = { | ||
|
|
@@ -38,7 +42,17 @@ function render({ model, el }) { | |
| // Main container with a unique class for CSS scoping | ||
| el.classList.add("bigframes-widget"); | ||
|
|
||
| // Structure | ||
| // Add error message container at the top | ||
| const errorContainer = document.createElement("div"); | ||
| errorContainer.classList.add("error-message"); | ||
| errorContainer.style.display = "none"; | ||
| errorContainer.style.color = "red"; | ||
| errorContainer.style.padding = "8px"; | ||
| errorContainer.style.marginBottom = "8px"; | ||
| errorContainer.style.border = "1px solid red"; | ||
| errorContainer.style.borderRadius = "4px"; | ||
| errorContainer.style.backgroundColor = "#ffebee"; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you move these styles to the CSS file? |
||
|
|
||
| const tableContainer = document.createElement("div"); | ||
| const footer = document.createElement("div"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use |
||
|
|
||
|
|
@@ -126,14 +140,93 @@ function render({ model, el }) { | |
| } | ||
| } | ||
|
|
||
| /** Updates the HTML in the table container and refreshes button states. */ | ||
| function handleTableHTMLChange() { | ||
| // Note: Using innerHTML is safe here because the content is generated | ||
| // by a trusted backend (DataFrame.to_html). | ||
| tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); | ||
|
|
||
| // Get sortable columns from backend | ||
| const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); | ||
| const currentSortColumn = model.get(ModelProperty.SORT_COLUMN); | ||
| const currentSortAscending = model.get(ModelProperty.SORT_ASCENDING); | ||
|
|
||
| // Add click handlers to column headers for sorting | ||
| const headers = tableContainer.querySelectorAll("th"); | ||
| headers.forEach((header) => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is quite a bit of logic. I would like to get JavaScript-level unit tests setup before merging this PR.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've set up a comprehensive JavaScript unit testing environment and implemented tests for the Testing Framework & Environment: Test Coverage: The
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Could you also add a workflow for this to our GitHub Actions folder? https://github.com/googleapis/python-bigquery-dataframes/tree/main/.github/workflows Something like this: |
||
| const headerDiv = header.querySelector("div"); | ||
| const columnName = headerDiv.textContent.trim(); | ||
|
|
||
| // Only add sorting UI for sortable columns | ||
| if (columnName && sortableColumns.includes(columnName)) { | ||
| header.style.cursor = "pointer"; | ||
|
|
||
| // Create a span for the indicator | ||
| const indicatorSpan = document.createElement("span"); | ||
| indicatorSpan.classList.add("sort-indicator"); | ||
| indicatorSpan.style.paddingLeft = "5px"; | ||
|
|
||
| // Determine sort indicator and initial visibility | ||
| let indicator = "●"; // Default: unsorted (dot) | ||
| if (currentSortColumn === columnName) { | ||
| indicator = currentSortAscending ? "▲" : "▼"; | ||
| indicatorSpan.style.visibility = "visible"; // Sorted arrows always visible | ||
| } else { | ||
| indicatorSpan.style.visibility = "hidden"; // Unsorted dot hidden by default | ||
| } | ||
| indicatorSpan.textContent = indicator; | ||
|
|
||
| // Add indicator to the header, replacing the old one if it exists | ||
| const existingIndicator = headerDiv.querySelector(".sort-indicator"); | ||
| if (existingIndicator) { | ||
| headerDiv.removeChild(existingIndicator); | ||
| } | ||
| headerDiv.appendChild(indicatorSpan); | ||
|
|
||
| // Add hover effects for unsorted columns only | ||
| header.addEventListener("mouseover", () => { | ||
| if (currentSortColumn !== columnName) { | ||
| indicatorSpan.style.visibility = "visible"; | ||
| } | ||
| }); | ||
| header.addEventListener("mouseout", () => { | ||
| if (currentSortColumn !== columnName) { | ||
| indicatorSpan.style.visibility = "hidden"; | ||
| } | ||
| }); | ||
|
|
||
| // Add click handler for three-state toggle | ||
| header.addEventListener(Event.CLICK, () => { | ||
| if (currentSortColumn === columnName) { | ||
| if (currentSortAscending) { | ||
| // Currently ascending → switch to descending | ||
| model.set(ModelProperty.SORT_ASCENDING, false); | ||
| } else { | ||
| // Currently descending → clear sort (back to unsorted) | ||
| model.set(ModelProperty.SORT_COLUMN, ""); | ||
| model.set(ModelProperty.SORT_ASCENDING, true); | ||
| } | ||
| } else { | ||
| // Not currently sorted → sort ascending | ||
| model.set(ModelProperty.SORT_COLUMN, columnName); | ||
| model.set(ModelProperty.SORT_ASCENDING, true); | ||
| } | ||
| model.save_changes(); | ||
| }); | ||
| } | ||
| }); | ||
|
|
||
| updateButtonStates(); | ||
| } | ||
|
|
||
| // Add error message handler | ||
| function handleErrorMessageChange() { | ||
| const errorMsg = model.get(ModelProperty.ERROR_MESSAGE); | ||
| if (errorMsg) { | ||
| errorContainer.textContent = errorMsg; | ||
| errorContainer.style.display = "block"; | ||
| } else { | ||
| errorContainer.style.display = "none"; | ||
| } | ||
| } | ||
|
|
||
| // Add event listeners | ||
| prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); | ||
| nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); | ||
|
|
@@ -145,6 +238,7 @@ function render({ model, el }) { | |
| }); | ||
| model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); | ||
| model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); | ||
| model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); | ||
| model.on(`change:_initial_load_complete`, (val) => { | ||
| if (val) { | ||
| updateButtonStates(); | ||
|
|
@@ -163,11 +257,13 @@ function render({ model, el }) { | |
| footer.appendChild(paginationContainer); | ||
| footer.appendChild(pageSizeContainer); | ||
|
|
||
| el.appendChild(errorContainer); | ||
| el.appendChild(tableContainer); | ||
| el.appendChild(footer); | ||
|
|
||
| // Initial render | ||
| handleTableHTMLChange(); | ||
| handleErrorMessageChange(); | ||
| } | ||
|
|
||
| export default { render }; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have we considered having multiple columns as a possibility? I think a single column is a good starting point, but I think it's an alternative worth considering, especially when a particular column contains lots of duplicate values, like a "date" column.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that multi-column sorting is particularly valuable when a column has many duplicate values. I would like to get the single column sorting checked in first as a PR. Then check in a second PR for multi-column sorting. This current PR is already complex enough. I prefer two separate PRs as enhancements.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, separate PR makes sense to me, thanks.