googleapis
diff --git a/‎bigframes/bigquery/_operations/ml.py‎
Lines changed: 90 additions & 1 deletion b/‎bigframes/bigquery/_operations/ml.py‎
Lines changed: 90 additions & 1 deletion
diff --git a/‎bigframes/bigquery/ml.py‎
Lines changed: 2 additions & 0 deletions b/‎bigframes/bigquery/ml.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/sqlglot/aggregations/unary_compiler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 18 additions & 16 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 18 additions & 16 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/sqlglot_ir.py‎
Lines changed: 34 additions & 33 deletions b/‎bigframes/core/compile/sqlglot/sqlglot_ir.py‎
Lines changed: 34 additions & 33 deletions
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Mapping, Optional, Union
+from typing import cast, List, Mapping, Optional, Union
 
 import bigframes_vendored.constants
 import google.cloud.bigquery
@@ -431,3 +431,92 @@ def transform(
         return bpd.read_gbq_query(sql)
     else:
         return session.read_gbq_query(sql)
+
+
+@log_adapter.method_logger(custom_base_name="bigquery_ml")
+def generate_text(
+    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    input_: Union[pd.DataFrame, dataframe.DataFrame, str],
+    *,
+    temperature: Optional[float] = None,
+    max_output_tokens: Optional[int] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    flatten_json_output: Optional[bool] = None,
+    stop_sequences: Optional[List[str]] = None,
+    ground_with_google_search: Optional[bool] = None,
+    request_type: Optional[str] = None,
+) -> dataframe.DataFrame:
+    """
+    Generates text using a BigQuery ML model.
+
+    See the `BigQuery ML GENERATE_TEXT function syntax
+    <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-text>`_
+    for additional reference.
+
+    Args:
+        model (bigframes.ml.base.BaseEstimator or str):
+            The model to use for text generation.
+        input_ (Union[bigframes.pandas.DataFrame, str]):
+            The DataFrame or query to use for text generation.
+        temperature (float, optional):
+            A FLOAT64 value that is used for sampling promiscuity. The value
+            must be in the range ``[0.0, 1.0]``. A lower temperature works well
+            for prompts that expect a more deterministic and less open-ended
+            or creative response, while a higher temperature can lead to more
+            diverse or creative results. A temperature of ``0`` is
+            deterministic, meaning that the highest probability response is
+            always selected.
+        max_output_tokens (int, optional):
+            An INT64 value that sets the maximum number of tokens in the
+            generated text.
+        top_k (int, optional):
+            An INT64 value that changes how the model selects tokens for
+            output. A ``top_k`` of ``1`` means the next selected token is the
+            most probable among all tokens in the model's vocabulary. A
+            ``top_k`` of ``3`` means that the next token is selected from
+            among the three most probable tokens by using temperature. The
+            default value is ``40``.
+        top_p (float, optional):
+            A FLOAT64 value that changes how the model selects tokens for
+            output. Tokens are selected from most probable to least probable
+            until the sum of their probabilities equals the ``top_p`` value.
+            For example, if tokens A, B, and C have a probability of 0.3, 0.2,
+            and 0.1 and the ``top_p`` value is ``0.5``, then the model will
+            select either A or B as the next token by using temperature. The
+            default value is ``0.95``.
+        flatten_json_output (bool, optional):
+            A BOOL value that determines the content of the generated JSON column.
+        stop_sequences (List[str], optional):
+            An ARRAY<STRING> value that contains the stop sequences for the model.
+        ground_with_google_search (bool, optional):
+            A BOOL value that determines whether to ground the model with Google Search.
+        request_type (str, optional):
+            A STRING value that contains the request type for the model.
+
+    Returns:
+        bigframes.pandas.DataFrame:
+            The generated text.
+    """
+    import bigframes.pandas as bpd
+
+    model_name, session = _get_model_name_and_session(model, input_)
+    table_sql = _to_sql(input_)
+
+    sql = bigframes.core.sql.ml.generate_text(
+        model_name=model_name,
+        table=table_sql,
+        temperature=temperature,
+        max_output_tokens=max_output_tokens,
+        top_k=top_k,
+        top_p=top_p,
+        flatten_json_output=flatten_json_output,
+        stop_sequences=stop_sequences,
+        ground_with_google_search=ground_with_google_search,
+        request_type=request_type,
+    )
+
+    if session is None:
+        return bpd.read_gbq_query(sql)
+    else:
+        return session.read_gbq_query(sql)
@@ -23,6 +23,7 @@
     create_model,
     evaluate,
     explain_predict,
+    generate_text,
     global_explain,
     predict,
     transform,
@@ -35,4 +36,5 @@
     "explain_predict",
     "global_explain",
     "transform",
+    "generate_text",
 ]
@@ -527,7 +527,7 @@ def _(
     else:
         result = apply_window_if_present(result, window)
 
-    if op.should_floor_result:
+    if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
         result = sge.Cast(this=sge.func("FLOOR", result), to="INT64")
     return result
 
 
@@ -41,8 +41,6 @@
 def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     """Compiles a BigFrameNode according to the request into SQL using SQLGlot."""
 
-    # Generator for unique identifiers.
-    uid_gen = guid.SequentialUIDGenerator()
     output_names = tuple((expression.DerefOp(id), id.sql) for id in request.node.ids)
     result_node = nodes.ResultNode(
         request.node,
@@ -61,22 +59,16 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     )
     if request.sort_rows:
         result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
-        result_node = _remap_variables(result_node, uid_gen)
-        result_node = typing.cast(
-            nodes.ResultNode, rewrite.defer_selection(result_node)
-        )
-        sql = _compile_result_node(result_node, uid_gen)
+        sql = _compile_result_node(result_node)
         return configs.CompileResult(
             sql, result_node.schema.to_bigquery(), result_node.order_by
         )
 
     ordering: typing.Optional[bf_ordering.RowOrdering] = result_node.order_by
     result_node = dataclasses.replace(result_node, order_by=None)
     result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
+    sql = _compile_result_node(result_node)
 
-    result_node = _remap_variables(result_node, uid_gen)
-    result_node = typing.cast(nodes.ResultNode, rewrite.defer_selection(result_node))
-    sql = _compile_result_node(result_node, uid_gen)
     # Return the ordering iff no extra columns are needed to define the row order
     if ordering is not None:
         output_order = (
@@ -97,11 +89,16 @@ def _remap_variables(
     return typing.cast(nodes.ResultNode, result_node)
 
 
-def _compile_result_node(
-    root: nodes.ResultNode, uid_gen: guid.SequentialUIDGenerator
-) -> str:
+def _compile_result_node(root: nodes.ResultNode) -> str:
+    # Create UIDs to standardize variable names and ensure consistent compilation
+    # of nodes using the same generator.
+    uid_gen = guid.SequentialUIDGenerator()
+    root = _remap_variables(root, uid_gen)
+    root = typing.cast(nodes.ResultNode, rewrite.defer_selection(root))
+
     # Have to bind schema as the final step before compilation.
     root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
+
     selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
         (name, scalar_compiler.scalar_op_compiler.compile_expression(ref))
         for ref, name in root.output_cols
@@ -127,7 +124,6 @@ def _compile_result_node(
     return sqlglot_ir.sql
 
 
-@functools.lru_cache(maxsize=5000)
 def compile_node(
     node: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator
 ) -> ir.SQLGlotIR:
@@ -266,10 +262,16 @@ def compile_concat(node: nodes.ConcatNode, *children: ir.SQLGlotIR) -> ir.SQLGlo
     assert len(children) >= 1
     uid_gen = children[0].uid_gen
 
-    output_ids = [id.sql for id in node.output_ids]
+    # BigQuery `UNION` query takes the column names from the first `SELECT` clause.
+    default_output_ids = [field.id.sql for field in node.child_nodes[0].fields]
+    output_aliases = [
+        (default_output_id, output_id.sql)
+        for default_output_id, output_id in zip(default_output_ids, node.output_ids)
+    ]
+
     return ir.SQLGlotIR.from_union(
         [child.expr for child in children],
-        output_ids=output_ids,
+        output_aliases=output_aliases,
         uid_gen=uid_gen,
     )
 
 
@@ -252,7 +252,7 @@ def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
     sg_expr = expr.expr
 
     if from_type == dtypes.STRING_DTYPE:
-        func_name = "PARSE_JSON_IN_SAFE" if op.safe else "PARSE_JSON"
+        func_name = "SAFE.PARSE_JSON" if op.safe else "PARSE_JSON"
         return sge.func(func_name, sg_expr)
     if from_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE, dtypes.FLOAT_DTYPE):
         sg_expr = sge.Cast(this=sg_expr, to="STRING")
 
@@ -170,7 +170,9 @@ def from_query_string(
         cls,
         query_string: str,
     ) -> SQLGlotIR:
-        """Builds a SQLGlot expression from a query string"""
+        """Builds a SQLGlot expression from a query string. Wrapping the query
+        in a CTE can avoid the query parsing issue for unsupported syntax in
+        SQLGlot."""
         uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator()
         cte_name = sge.to_identifier(
             next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted
@@ -187,7 +189,7 @@ def from_query_string(
     def from_union(
         cls,
         selects: typing.Sequence[sge.Select],
-        output_ids: typing.Sequence[str],
+        output_aliases: typing.Sequence[typing.Tuple[str, str]],
         uid_gen: guid.SequentialUIDGenerator,
     ) -> SQLGlotIR:
         """Builds a SQLGlot expression by unioning of multiple select expressions."""
@@ -196,46 +198,36 @@ def from_union(
         ), f"At least two select expressions must be provided, but got {selects}."
 
         existing_ctes: list[sge.CTE] = []
-        union_selects: list[sge.Expression] = []
+        union_selects: list[sge.Select] = []
         for select in selects:
             assert isinstance(
                 select, sge.Select
             ), f"All provided expressions must be of type sge.Select, but got {type(select)}"
 
             select_expr = select.copy()
             select_expr, select_ctes = _pop_query_ctes(select_expr)
-            existing_ctes = [*existing_ctes, *select_ctes]
-
-            new_cte_name = sge.to_identifier(
-                next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted
-            )
-            new_cte = sge.CTE(
-                this=select_expr,
-                alias=new_cte_name,
+            existing_ctes = _merge_ctes(existing_ctes, select_ctes)
+            union_selects.append(select_expr)
+
+        union_expr: sge.Query = union_selects[0].subquery()
+        for select in union_selects[1:]:
+            union_expr = sge.Union(
+                this=union_expr,
+                expression=select.subquery(),
+                distinct=False,
+                copy=False,
             )
-            existing_ctes = [*existing_ctes, new_cte]
 
-            selections = [
-                sge.Alias(
-                    this=sge.to_identifier(expr.alias_or_name, quoted=cls.quoted),
-                    alias=sge.to_identifier(output_id, quoted=cls.quoted),
-                )
-                for expr, output_id in zip(select_expr.expressions, output_ids)
-            ]
-            union_selects.append(
-                sge.Select().select(*selections).from_(sge.Table(this=new_cte_name))
+        selections = [
+            sge.Alias(
+                this=sge.to_identifier(old_name, quoted=cls.quoted),
+                alias=sge.to_identifier(new_name, quoted=cls.quoted),
             )
-
-        union_expr = typing.cast(
-            sge.Select,
-            functools.reduce(
-                lambda x, y: sge.Union(
-                    this=x, expression=y, distinct=False, copy=False
-                ),
-                union_selects,
-            ),
+            for old_name, new_name in output_aliases
+        ]
+        final_select_expr = (
+            sge.Select().select(*selections).from_(union_expr.subquery())
         )
-        final_select_expr = sge.Select().select(sge.Star()).from_(union_expr.subquery())
         final_select_expr = _set_query_ctes(final_select_expr, existing_ctes)
         return cls(expr=final_select_expr, uid_gen=uid_gen)
 
@@ -345,7 +337,7 @@ def join(
 
         left_select, left_ctes = _pop_query_ctes(left_select)
         right_select, right_ctes = _pop_query_ctes(right_select)
-        merged_ctes = [*left_ctes, *right_ctes]
+        merged_ctes = _merge_ctes(left_ctes, right_ctes)
 
         join_on = _and(
             tuple(
@@ -382,7 +374,7 @@ def isin_join(
 
         left_select, left_ctes = _pop_query_ctes(left_select)
         right_select, right_ctes = _pop_query_ctes(right_select)
-        merged_ctes = [*left_ctes, *right_ctes]
+        merged_ctes = _merge_ctes(left_ctes, right_ctes)
 
         left_condition = typed_expr.TypedExpr(
             sge.Column(this=conditions[0].expr, table=left_cte_name),
@@ -835,6 +827,15 @@ def _set_query_ctes(
     return new_expr
 
 
+def _merge_ctes(ctes1: list[sge.CTE], ctes2: list[sge.CTE]) -> list[sge.CTE]:
+    """Merges two lists of CTEs, de-duplicating by alias name."""
+    seen = {cte.alias: cte for cte in ctes1}
+    for cte in ctes2:
+        if cte.alias not in seen:
+            seen[cte.alias] = cte
+    return list(seen.values())
+
+
 def _pop_query_ctes(
     expr: sge.Select,
 ) -> tuple[sge.Select, list[sge.CTE]]: