From 7de4cdd622995167086581ea0adaad3b501f33e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Wed, 3 Jun 2026 21:27:55 +0000
Subject: [PATCH] fix(bigframes): avoid exceptions for unnamed JSON columns in
 SQL Cell outputs

---
 packages/bigframes/bigframes/core/indexers.py | 123 +++++++++-
 packages/bigframes/bigframes/dataframe.py     |  22 +-
 packages/bigframes/bigframes/series.py        |   1 -
 .../generative_ai/ai_functions.ipynb          |  44 ++--
 .../bigframes/tests/unit/test_iloc_setitem.py | 212 ++++++++++++++++++
 5 files changed, 374 insertions(+), 28 deletions(-)
 create mode 100644 packages/bigframes/tests/unit/test_iloc_setitem.py

diff --git a/packages/bigframes/bigframes/core/indexers.py b/packages/bigframes/bigframes/core/indexers.py
index c7cfc4f52ade..b3f8e4ce5639 100644
--- a/packages/bigframes/bigframes/core/indexers.py
+++ b/packages/bigframes/bigframes/core/indexers.py
@@ -28,6 +28,7 @@
 import bigframes.core.guid as guid
 import bigframes.core.indexes as indexes
 import bigframes.core.scalar
+import bigframes.core.validations as validations
 import bigframes.core.window_spec as windows
 import bigframes.dataframe
 import bigframes.dtypes
@@ -102,6 +103,18 @@ def __getitem__(
 
         Other key types are not yet supported.
         """
+        requires_ordering = True
+        if (
+            isinstance(key, slice)
+            and (key.start is None or key.start == 0)
+            and (key.step is None or key.step == 1)
+            and key.stop is None
+        ):
+            requires_ordering = False
+
+        if requires_ordering:
+            validations.enforce_ordered(self._series, "iloc")
+
         return _iloc_getitem_series_or_dataframe(self._series, key)
 
 
@@ -244,8 +257,113 @@ def __getitem__(self, key) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
 
         Other key types are not yet supported.
         """
+        requires_ordering = True
+        if isinstance(key, tuple):
+            if len(key) > 0:
+                row_indexer = key[0]
+                if (
+                    isinstance(row_indexer, slice)
+                    and (row_indexer.start is None or row_indexer.start == 0)
+                    and (row_indexer.step is None or row_indexer.step == 1)
+                    and row_indexer.stop is None
+                ):
+                    requires_ordering = False
+        else:
+            if (
+                isinstance(key, slice)
+                and (key.start is None or key.start == 0)
+                and (key.step is None or key.step == 1)
+                and key.stop is None
+            ):
+                requires_ordering = False
+
+        if requires_ordering:
+            validations.enforce_ordered(self._dataframe, "iloc")
+
         return _iloc_getitem_series_or_dataframe(self._dataframe, key)
 
+    def __setitem__(
+        self,
+        key: Tuple[
+            slice, Union[int, typing.Sequence[int], slice, typing.Sequence[bool]]
+        ],
+        value: Union[
+            bigframes.dataframe.SingleItemValue, bigframes.dataframe.DataFrame
+        ],
+    ):
+        if not (
+            isinstance(key, tuple)
+            and len(key) == 2
+            and isinstance(key[0], slice)
+            and (key[0].start is None or key[0].start == 0)
+            and (key[0].step is None or key[0].step == 1)
+            and key[0].stop is None
+        ):
+            raise NotImplementedError(
+                "Only DataFrame.iloc[:, col_indexer] = value is supported."
+            )
+
+        col_indexer = key[1]
+        n_cols = len(self._dataframe.columns)
+
+        if isinstance(col_indexer, bool):
+            raise TypeError(
+                "pos must be integer or slice or list-like of integers/booleans"
+            )
+
+        if isinstance(col_indexer, int):
+            col_offset = col_indexer
+            if col_offset < 0:
+                col_offset += n_cols
+            if col_offset < 0 or col_offset >= n_cols:
+                raise IndexError("single positional indexer is out-of-bounds")
+
+            col_label = self._dataframe.columns[col_offset]
+            df = self._dataframe.assign(**{col_label: value})
+            self._dataframe._set_block(df._get_block())
+
+        elif isinstance(col_indexer, slice):
+            col_offsets = list(range(*col_indexer.indices(n_cols)))
+            col_labels = [self._dataframe.columns[idx] for idx in col_offsets]
+            if not col_labels:
+                return
+            df = self._dataframe._assign_multi_items(col_labels, value)
+            self._dataframe._set_block(df._get_block())
+
+        elif pd.api.types.is_list_like(col_indexer):
+            col_indexer_list = list(col_indexer)
+
+            if len(col_indexer_list) > 0 and all(
+                isinstance(x, bool) for x in col_indexer_list
+            ):
+                if len(col_indexer_list) != n_cols:
+                    raise ValueError(
+                        f"Boolean index has wrong length: {len(col_indexer_list)} instead of {n_cols}"
+                    )
+                col_offsets = [i for i, val in enumerate(col_indexer_list) if val]
+            else:
+                col_offsets = []
+                for idx in col_indexer_list:
+                    if isinstance(idx, bool):
+                        raise TypeError("pos list must contain only integers")
+                    if not isinstance(idx, int):
+                        raise TypeError("pos list must contain only integers")
+                    if idx < 0:
+                        idx += n_cols
+                    if idx < 0 or idx >= n_cols:
+                        raise IndexError("positional indexer is out-of-bounds")
+                    col_offsets.append(idx)
+
+            col_labels = [self._dataframe.columns[idx] for idx in col_offsets]
+            if not col_labels:
+                return
+            df = self._dataframe._assign_multi_items(col_labels, value)
+            self._dataframe._set_block(df._get_block())
+        else:
+            raise TypeError(
+                "pos must be integer or slice or list-like of integers/booleans"
+            )
+
 
 class IatDataFrameIndexer:
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
@@ -470,8 +588,11 @@ def _iloc_getitem_series_or_dataframe(
 
         # len(key) == 2
         df = typing.cast(bigframes.dataframe.DataFrame, series_or_dataframe)
-        if isinstance(key[1], int):
+        if isinstance(key[0], int) and isinstance(key[1], int):
             return df.iat[key]
+        elif isinstance(key[1], int):
+            col_label = df.columns[key[1]]
+            return df[col_label].iloc[key[0]]
         elif isinstance(key[1], list):
             columns = df.columns[key[1]]
             return _iloc_getitem_series_or_dataframe(df[columns], key[0])
diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py
index 6b7922fe9753..14e064b4c942 100644
--- a/packages/bigframes/bigframes/dataframe.py
+++ b/packages/bigframes/bigframes/dataframe.py
@@ -317,7 +317,6 @@ def loc(self) -> indexers.LocDataFrameIndexer:
         return indexers.LocDataFrameIndexer(self)
 
     @property
-    @validations.requires_ordering()
     def iloc(self) -> indexers.ILocDataFrameIndexer:
         return indexers.ILocDataFrameIndexer(self)
 
@@ -821,22 +820,25 @@ def __repr__(self) -> str:
 
     def _get_display_df(self) -> DataFrame:
         """Process ObjectRef and JSON/nested JSON columns for display."""
+        import bigframes.bigquery as bbq
+
         df = self
         # Arrow/Pandas to_pandas_batches does not support raw JSON/nested JSON
         # columns. Pre-serialize them to string format to bypass this limit.
         # Using TO_JSON_STRING via SqlScalarOp handles complex nested STRUCT
-        # types correctly.
-        json_cols = [
-            col
-            for col in df.columns
+        # types correctly. Use the offset so that we can handle duplicate and
+        # non-string column names.
+        json_col_indexes = [
+            col_index
+            for col_index, col in enumerate(df.columns)
             if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype)
         ]
-        if json_cols:
-            op = ops.SqlScalarOp(
-                _output_type=bigframes.dtypes.STRING_DTYPE,
-                sql_template="TO_JSON_STRING({0})",
+        if json_col_indexes:
+            df._block.apply_analytic
+            df.iloc[:, json_col_indexes] = cast(
+                DataFrame,
+                df.iloc[:, json_col_indexes].apply(bbq.to_json_string),  # type: ignore
             )
-            df = df.assign(**{col: df[col]._apply_unary_op(op) for col in json_cols})
         return df
 
     def _repr_mimebundle_(self, include=None, exclude=None):
diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py
index 60acad0c301f..f4985010b925 100644
--- a/packages/bigframes/bigframes/series.py
+++ b/packages/bigframes/bigframes/series.py
@@ -243,7 +243,6 @@ def loc(self) -> bigframes.core.indexers.LocSeriesIndexer:
         return bigframes.core.indexers.LocSeriesIndexer(self)
 
     @property
-    @validations.requires_ordering()
     def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer:
         return bigframes.core.indexers.IlocSeriesIndexer(self)
 
diff --git a/packages/bigframes/notebooks/generative_ai/ai_functions.ipynb b/packages/bigframes/notebooks/generative_ai/ai_functions.ipynb
index 13234414df5e..dbb044d3943f 100644
--- a/packages/bigframes/notebooks/generative_ai/ai_functions.ipynb
+++ b/packages/bigframes/notebooks/generative_ai/ai_functions.ipynb
@@ -71,14 +71,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "c9f924aa",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import bigframes.pandas as bpd \n",
+    "import bigframes.pandas as bpd\n",
     "\n",
-    "PROJECT_ID = \"\" # @param {type:\"string\"}\n",
+    "PROJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n",
     "\n",
     "bpd.options.bigquery.project = PROJECT_ID\n",
     "bpd.options.bigquery.ordering_mode = \"partial\"\n",
@@ -105,16 +105,24 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/global_session.py:103: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
-      "  _global_session = bigframes.session.connect(\n"
+      "/usr/local/google/home/swast/src/github.com/googleapis/google-cloud-python/packages/bigframes/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+      "  _global_session = bigframes.session.connect(\n",
+      "/usr/local/google/home/swast/src/github.com/googleapis/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
+      "instead of using `db_dtypes` in the future when available in pandas\n",
+      "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
+      "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
      ]
     },
     {
      "data": {
+      "text/html": [
+       "<pre>0    {\"result\":\"Salad\",\"full_response\":{\"candidates...\n",
+       "1    {\"result\":\"Hotdog\",\"full_response\":{\"candidate...</pre>"
+      ],
       "text/plain": [
-       "0    {'result': 'Salad\\n', 'full_response': '{\"cand...\n",
-       "1    {'result': 'Sausageroll\\n', 'full_response': '...\n",
-       "dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]"
+       "0    {\"result\":\"Salad\",\"full_response\":{\"candidates...\n",
+       "1    {\"result\":\"Hotdog\",\"full_response\":{\"candidate...\n",
+       "Name: 0, dtype: string"
       ]
      },
      "execution_count": 3,
@@ -156,9 +164,13 @@
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<pre>0    <NA>\n",
+       "1    <NA></pre>"
+      ],
       "text/plain": [
-       "0     Lettuce\n",
-       "1    The food\n",
+       "0    <NA>\n",
+       "1    <NA>\n",
        "Name: result, dtype: string"
       ]
      },
@@ -327,7 +339,7 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>tiger</td>\n",
-       "      <td>8.0</td>\n",
+       "      <td>7.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -342,7 +354,7 @@
       "text/plain": [
        "      animals  relative_weight\n",
        "1      spider              1.0\n",
-       "0       tiger              8.0\n",
+       "0       tiger              7.0\n",
        "2  blue whale             10.0\n",
        "\n",
        "[3 rows x 2 columns]"
@@ -465,7 +477,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "2e66110a",
    "metadata": {},
    "outputs": [
@@ -518,7 +530,7 @@
        "[2 rows x 2 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -533,7 +545,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv (3.10.17)",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -547,7 +559,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.17"
+   "version": "3.14.3"
   }
  },
  "nbformat": 4,
diff --git a/packages/bigframes/tests/unit/test_iloc_setitem.py b/packages/bigframes/tests/unit/test_iloc_setitem.py
new file mode 100644
index 000000000000..66b9bf9e3086
--- /dev/null
+++ b/packages/bigframes/tests/unit/test_iloc_setitem.py
@@ -0,0 +1,212 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Generator
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import bigframes
+import bigframes.pandas as bpd
+from bigframes.testing.utils import assert_frame_equal, assert_series_equal
+
+pytest.importorskip("polars")
+
+
+@pytest.fixture(scope="module", autouse=True)
+def session() -> Generator[bigframes.Session, None, None]:
+    import bigframes.core.global_session
+    from bigframes.testing import polars_session
+
+    session = polars_session.TestSession()
+    with bigframes.core.global_session._GlobalSessionContext(session):
+        yield session
+
+
+@pytest.fixture
+def sample_df() -> bpd.DataFrame:
+    pd_df = pd.DataFrame(
+        {
+            "A": [1, 2, 3],
+            "B": [4, 5, 6],
+            "C": [7, 8, 9],
+        }
+    )
+    return bpd.read_pandas(pd_df)
+
+
+def test_iloc_setitem_single_integer(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    bf_df.iloc[:, 1] = 99
+    pd_df.iloc[:, 1] = 99
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_setitem_single_integer_negative(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    bf_df.iloc[:, -1] = 99
+    pd_df.iloc[:, -1] = 99
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_setitem_list_integer(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    bf_df.iloc[:, [0, 2]] = [99, 88]
+    pd_df.iloc[:, [0, 2]] = [99, 88]
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_setitem_slice(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    bf_df.iloc[:, 0:2] = 99
+    pd_df.iloc[:, 0:2] = 99
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_setitem_boolean_mask(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    mask = [True, False, True]
+    bf_df.iloc[:, mask] = 99
+    pd_df.iloc[:, np.array(mask)] = 99
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_setitem_dataframe(sample_df):
+    bf_df = sample_df.copy()
+    pd_df = sample_df.to_pandas()
+
+    value_df = bpd.DataFrame({"B": [99, 88, 77], "C": [66, 55, 44]})
+    bf_df.iloc[:, 1:3] = value_df
+    pd_df.iloc[:, 1:3] = value_df.to_pandas()
+
+    assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_iloc_getitem_single_integer(sample_df):
+    bf_df = sample_df
+    pd_df = sample_df.to_pandas()
+
+    bf_result = bf_df.iloc[:, 1].to_pandas()
+    pd_result = pd_df.iloc[:, 1]
+
+    assert_series_equal(bf_result, pd_result)
+
+
+def test_iloc_getitem_unordered(sample_df):
+    session = sample_df._session
+    original_strictly_ordered = session._strictly_ordered
+    original_allow_ambiguity = session._allow_ambiguity
+
+    try:
+        session._strictly_ordered = False
+        session._allow_ambiguity = True
+
+        import unittest.mock as mock
+
+        with (
+            mock.patch.object(
+                type(sample_df._block.expr),
+                "order_ambiguous",
+                new_callable=mock.PropertyMock,
+            ) as mock_ambiguous,
+            mock.patch.object(
+                type(sample_df._block),
+                "explicitly_ordered",
+                new_callable=mock.PropertyMock,
+            ) as mock_explicit,
+        ):
+            mock_ambiguous.return_value = True
+            mock_explicit.return_value = False
+
+            # 1. Column indexing only - should NOT raise
+            try:
+                sample_df.iloc[:, 1]
+            except bigframes.exceptions.OrderRequiredError:
+                pytest.fail("iloc[:, col] raised OrderRequiredError unexpectedly!")
+
+            # 1b. Column indexing with slice(0, None) (NOT exactly `:` but fine) - should NOT raise
+            try:
+                sample_df.iloc[slice(0, None), 1]
+            except bigframes.exceptions.OrderRequiredError:
+                pytest.fail("iloc[0:, col] raised OrderRequiredError unexpectedly!")
+
+            # 1c. Column indexing with slice(None, None, 1) (NOT exactly `:` but fine) - should NOT raise
+            try:
+                sample_df.iloc[slice(None, None, 1), 1]
+            except bigframes.exceptions.OrderRequiredError:
+                pytest.fail("iloc[::1, col] raised OrderRequiredError unexpectedly!")
+
+            # 1d. Column indexing with slice(1, None) (row subset) - should RAISE
+            with pytest.raises(bigframes.exceptions.OrderRequiredError):
+                sample_df.iloc[slice(1, None), 1]
+
+            # 1e. Column indexing with slice(None, 2) (row subset) - should RAISE
+            with pytest.raises(bigframes.exceptions.OrderRequiredError):
+                sample_df.iloc[slice(None, 2), 1]
+
+            # 2. Column setitem only - should NOT raise
+            try:
+                bf_df = sample_df.copy()
+                bf_df.iloc[:, 1] = 99
+            except bigframes.exceptions.OrderRequiredError:
+                pytest.fail(
+                    "iloc[:, col] = val raised OrderRequiredError unexpectedly!"
+                )
+
+            # 3. Row indexing - should RAISE
+            with pytest.raises(bigframes.exceptions.OrderRequiredError):
+                sample_df.iloc[1, :]
+
+            # 4. Single indexer (row indexing) - should RAISE
+            with pytest.raises(bigframes.exceptions.OrderRequiredError):
+                sample_df.iloc[1]
+
+    finally:
+        session._strictly_ordered = original_strictly_ordered
+        session._allow_ambiguity = original_allow_ambiguity
+
+
+def test_iloc_setitem_errors(sample_df):
+    bf_df = sample_df.copy()
+
+    # Out of bounds
+    with pytest.raises(IndexError):
+        bf_df.iloc[:, 3] = 99
+
+    with pytest.raises(IndexError):
+        bf_df.iloc[:, -4] = 99
+
+    # Invalid key type (not slice(None) for rows)
+    with pytest.raises(NotImplementedError):
+        bf_df.iloc[0, 1] = 99
+
+    # Invalid col indexer type
+    with pytest.raises(TypeError):
+        bf_df.iloc[:, "B"] = 99