googleapis
diff --git a/‎bigframes/_config/display_options.py‎
Lines changed: 7 additions & 1 deletion b/‎bigframes/_config/display_options.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎bigframes/blob/_functions.py‎
Lines changed: 6 additions & 2 deletions b/‎bigframes/blob/_functions.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 25 additions & 9 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 4 additions & 0 deletions b/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/aggregate_compiler.py‎
Lines changed: 19 additions & 1 deletion b/‎bigframes/core/compile/sqlglot/aggregate_compiler.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 14 additions & 1 deletion b/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 69 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/binary_compiler.py‎
Lines changed: 47 additions & 7 deletions b/‎bigframes/core/compile/sqlglot/expressions/binary_compiler.py‎
Lines changed: 47 additions & 7 deletions
@@ -26,8 +26,12 @@
 class DisplayOptions:
     __doc__ = vendored_pandas_config.display_options_doc
 
+    # Options borrowed from pandas.
     max_columns: int = 20
-    max_rows: int = 25
+    max_rows: int = 10
+    precision: int = 6
+
+    # Options unique to BigQuery DataFrames.
     progress_bar: Optional[str] = "auto"
     repr_mode: Literal["head", "deferred", "anywidget"] = "head"
 
@@ -52,6 +56,8 @@ def pandas_repr(display_options: DisplayOptions):
         display_options.max_columns,
         "display.max_rows",
         display_options.max_rows,
+        "display.precision",
+        display_options.precision,
         "display.show_dimensions",
         True,
     ) as pandas_context:
 
@@ -473,7 +473,9 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
     return result_json
 
 
-pdf_extract_def = FunctionDef(pdf_extract_func, ["pypdf", "requests", "pypdf[crypto]"])
+pdf_extract_def = FunctionDef(
+    pdf_extract_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"]
+)
 
 
 # Extracts text from a PDF url and chunks it simultaneously
@@ -527,4 +529,6 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
     return result_json
 
 
-pdf_chunk_def = FunctionDef(pdf_chunk_func, ["pypdf", "requests", "pypdf[crypto]"])
+pdf_chunk_def = FunctionDef(
+    pdf_chunk_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"]
+)
@@ -387,25 +387,39 @@ def reversed(self) -> Block:
             index_labels=self.index.names,
         )
 
-    def reset_index(self, drop: bool = True) -> Block:
+    def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
         """Reset the index of the block, promoting the old index to a value column.
 
         Arguments:
+            level: the label or index level of the index levels to remove.
             name: this is the column id for the new value id derived from the old index
 
         Returns:
             A new Block because dropping index columns can break references
             from Index classes that point to this block.
         """
+        if level:
+            # preserve original order, not user provided order
+            level_ids: Sequence[str] = [
+                id for id in self.index_columns if id in self.index.resolve_level(level)
+            ]
+        else:
+            level_ids = self.index_columns
+
         expr = self._expr
-        if (
+        if set(self.index_columns) > set(level_ids):
+            new_index_cols = [col for col in self.index_columns if col not in level_ids]
+            new_index_labels = [self.col_id_to_index_name[id] for id in new_index_cols]
+        elif (
             self.session._default_index_type
             == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
         ):
             expr, new_index_col_id = expr.promote_offsets()
             new_index_cols = [new_index_col_id]
+            new_index_labels = [None]
         elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
             new_index_cols = []
+            new_index_labels = []
         else:
             raise ValueError(
                 f"Unrecognized default index kind: {self.session._default_index_type}"
@@ -415,22 +429,23 @@ def reset_index(self, drop: bool = True) -> Block:
             # Even though the index might be part of the ordering, keep that
             # ordering expression as reset_index shouldn't change the row
             # order.
-            expr = expr.drop_columns(self.index_columns)
+            expr = expr.drop_columns(level_ids)
             return Block(
                 expr,
                 index_columns=new_index_cols,
+                index_labels=new_index_labels,
                 column_labels=self.column_labels,
             )
         else:
             # Add index names to column index
-            index_labels = self.index.names
             column_labels_modified = self.column_labels
-            for level, label in enumerate(index_labels):
+            for position, level_id in enumerate(level_ids):
+                label = self.col_id_to_index_name[level_id]
                 if label is None:
-                    if "index" not in self.column_labels and len(index_labels) <= 1:
+                    if "index" not in self.column_labels and self.index.nlevels <= 1:
                         label = "index"
                     else:
-                        label = f"level_{level}"
+                        label = f"level_{self.index_columns.index(level_id)}"
 
                 if label in self.column_labels:
                     raise ValueError(f"cannot insert {label}, already exists")
@@ -439,11 +454,12 @@ def reset_index(self, drop: bool = True) -> Block:
                     label = tuple(label if i == 0 else "" for i in range(nlevels))
                 # Create index copy with label inserted
                 # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
-                column_labels_modified = column_labels_modified.insert(level, label)
+                column_labels_modified = column_labels_modified.insert(position, label)
 
             return Block(
-                expr,
+                expr.select_columns((*new_index_cols, *level_ids, *self.value_columns)),
                 index_columns=new_index_cols,
+                index_labels=new_index_labels,
                 column_labels=column_labels_modified,
             )
 
 
@@ -198,6 +198,10 @@ def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
         def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
             return l_input | r_input
 
+        @compile_op.register(bool_ops.XorOp)
+        def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
+            return l_input ^ r_input
+
         @compile_op.register(num_ops.AddOp)
         def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
             return l_input + r_input
 
@@ -15,7 +15,7 @@
 
 import sqlglot.expressions as sge
 
-from bigframes.core import expression
+from bigframes.core import expression, window_spec
 from bigframes.core.compile.sqlglot.aggregations import (
     binary_compiler,
     nullary_compiler,
@@ -56,3 +56,21 @@ def compile_aggregate(
         return binary_compiler.compile(aggregate.op, left, right)
     else:
         raise ValueError(f"Unexpected aggregation: {aggregate}")
+
+
+def compile_analytic(
+    aggregate: expression.Aggregation,
+    window: window_spec.WindowSpec,
+) -> sge.Expression:
+    if isinstance(aggregate, expression.NullaryAggregation):
+        return nullary_compiler.compile(aggregate.op)
+    if isinstance(aggregate, expression.UnaryAggregation):
+        column = typed_expr.TypedExpr(
+            scalar_compiler.compile_scalar_expression(aggregate.arg),
+            aggregate.arg.output_type,
+        )
+        return unary_compiler.compile(aggregate.op, column, window)
+    elif isinstance(aggregate, expression.BinaryAggregation):
+        raise NotImplementedError("binary analytic operations not yet supported")
+    else:
+        raise ValueError(f"Unexpected analytic operation: {aggregate}")
@@ -18,6 +18,7 @@
 
 import sqlglot.expressions as sge
 
+from bigframes import dtypes
 from bigframes.core import window_spec
 import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
 from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present
@@ -36,14 +37,26 @@ def compile(
     return UNARY_OP_REGISTRATION[op](op, column, window=window)
 
 
+@UNARY_OP_REGISTRATION.register(agg_ops.CountOp)
+def _(
+    op: agg_ops.CountOp,
+    column: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    return apply_window_if_present(sge.func("COUNT", column.expr), window)
+
+
 @UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
 def _(
     op: agg_ops.SumOp,
     column: typed_expr.TypedExpr,
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
+    expr = column.expr
+    if column.dtype == dtypes.BOOL_DTYPE:
+        expr = sge.Cast(this=column.expr, to="INT64")
     # Will be null if all inputs are null. Pandas defaults to zero sum though.
-    expr = apply_window_if_present(sge.func("SUM", column.expr), window)
+    expr = apply_window_if_present(sge.func("SUM", expr), window)
     return sge.func("IFNULL", expr, ir._literal(0, column.dtype))
 
 
 
@@ -298,6 +298,75 @@ def compile_aggregate(
 
         return child.aggregate(aggregations, by_cols, tuple(dropna_cols))
 
+    @_compile_node.register
+    def compile_window(
+        self, node: nodes.WindowOpNode, child: ir.SQLGlotIR
+    ) -> ir.SQLGlotIR:
+        window_spec = node.window_spec
+        if node.expression.op.order_independent and window_spec.is_unbounded:
+            # notably percentile_cont does not support ordering clause
+            window_spec = window_spec.without_order()
+
+        window_op = aggregate_compiler.compile_analytic(node.expression, window_spec)
+
+        inputs: tuple[sge.Expression, ...] = tuple(
+            scalar_compiler.compile_scalar_expression(expression.DerefOp(column))
+            for column in node.expression.column_references
+        )
+
+        clauses: list[tuple[sge.Expression, sge.Expression]] = []
+        if node.expression.op.skips_nulls and not node.never_skip_nulls:
+            for column in inputs:
+                clauses.append((sge.Is(this=column, expression=sge.Null()), sge.Null()))
+
+        if window_spec.min_periods and len(inputs) > 0:
+            if node.expression.op.skips_nulls:
+                # Most operations do not count NULL values towards min_periods
+                not_null_columns = [
+                    sge.Not(this=sge.Is(this=column, expression=sge.Null()))
+                    for column in inputs
+                ]
+                # All inputs must be non-null for observation to count
+                if not not_null_columns:
+                    is_observation_expr: sge.Expression = sge.convert(True)
+                else:
+                    is_observation_expr = not_null_columns[0]
+                    for expr in not_null_columns[1:]:
+                        is_observation_expr = sge.And(
+                            this=is_observation_expr, expression=expr
+                        )
+                is_observation = ir._cast(is_observation_expr, "INT64")
+                observation_count = windows.apply_window_if_present(
+                    sge.func("SUM", is_observation), window_spec
+                )
+            else:
+                # Operations like count treat even NULLs as valid observations
+                # for the sake of min_periods notnull is just used to convert
+                # null values to non-null (FALSE) values to be counted.
+                is_observation = ir._cast(
+                    sge.Not(this=sge.Is(this=inputs[0], expression=sge.Null())),
+                    "INT64",
+                )
+                observation_count = windows.apply_window_if_present(
+                    sge.func("COUNT", is_observation), window_spec
+                )
+
+            clauses.append(
+                (
+                    observation_count < sge.convert(window_spec.min_periods),
+                    sge.Null(),
+                )
+            )
+        if clauses:
+            when_expressions = [sge.When(this=cond, true=res) for cond, res in clauses]
+            window_op = sge.Case(ifs=when_expressions, default=window_op)
+
+        # TODO: check if we can directly window the expression.
+        return child.window(
+            window_op=window_op,
+            output_column_id=node.output_name.sql,
+        )
+
 
 def _replace_unsupported_ops(node: nodes.BigFrameNode):
     node = nodes.bottom_up(node, rewrite.rewrite_slice)
 
@@ -73,6 +73,51 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
     )
 
 
+@BINARY_OP_REGISTRATION.register(ops.div_op)
+def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    left_expr = left.expr
+    if left.dtype == dtypes.BOOL_DTYPE:
+        left_expr = sge.Cast(this=left_expr, to="INT64")
+    right_expr = right.expr
+    if right.dtype == dtypes.BOOL_DTYPE:
+        right_expr = sge.Cast(this=right_expr, to="INT64")
+
+    result = sge.func("IEEE_DIVIDE", left_expr, right_expr)
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
+        return sge.Cast(this=sge.Floor(this=result), to="INT64")
+    else:
+        return result
+
+
+@BINARY_OP_REGISTRATION.register(ops.ge_op)
+def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    return sge.GTE(this=left.expr, expression=right.expr)
+
+
+@BINARY_OP_REGISTRATION.register(ops.JSONSet)
+def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)
+
+
+@BINARY_OP_REGISTRATION.register(ops.mul_op)
+def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    left_expr = left.expr
+    if left.dtype == dtypes.BOOL_DTYPE:
+        left_expr = sge.Cast(this=left_expr, to="INT64")
+    right_expr = right.expr
+    if right.dtype == dtypes.BOOL_DTYPE:
+        right_expr = sge.Cast(this=right_expr, to="INT64")
+
+    result = sge.Mul(this=left_expr, expression=right_expr)
+
+    if (dtypes.is_numeric(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE) or (
+        left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype)
+    ):
+        return sge.Cast(this=sge.Floor(this=result), to="INT64")
+    else:
+        return result
+
+
 @BINARY_OP_REGISTRATION.register(ops.sub_op)
 def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
     if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
@@ -115,11 +160,6 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
     )
 
 
-@BINARY_OP_REGISTRATION.register(ops.ge_op)
+@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
 def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
-    return sge.GTE(this=left.expr, expression=right.expr)
-
-
-@BINARY_OP_REGISTRATION.register(ops.JSONSet)
-def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
-    return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)
+    return sge.func("OBJ.MAKE_REF", left.expr, right.expr)