From 11a8b5eaf279e7b7f2614ec264090b0cce9d8ba4 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 7 Feb 2026 06:47:31 +0000 Subject: [PATCH] refactor: use direct API for audio transcription --- .../multimodal/multimodal_dataframe.ipynb | 95 +++++++++++-------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 0822ee4c2d..3746694634 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1451,25 +1451,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Audio transcribe function" + "### 6. Audio transcribe" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", "df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")" @@ -1477,18 +1466,14 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" @@ -1496,54 +1481,82 @@ }, { "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], "text/plain": [ "0 Now, as all books, not primarily intended as p...\n", "Name: transcribed_content, dtype: string" ] }, - "execution_count": 22, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transcribed_series = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=False)\n", + "import bigframes.bigquery as bbq\n", + "import bigframes.operations as ops\n", + "\n", + "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", + "# Here's how to perform the same operation directly:\n", + "\n", + "audio_series = df['audio']\n", + "prompt_text = (\n", + " \"**Task:** Transcribe the provided audio. **Instructions:** - Your response \"\n", + " \"must contain only the verbatim transcription of the audio. - Do not include \"\n", + " \"any introductory text, summaries, or conversational filler in your response. \"\n", + " \"The output should begin directly with the first word of the audio.\"\n", + ")\n", + "\n", + "# Convert the audio series to the runtime representation required by the model.\n", + "# This involves fetching metadata and getting a signed access URL.\n", + "audio_metadata = audio_series._apply_unary_op(ops.obj_fetch_metadata_op)\n", + "audio_runtime = audio_metadata._apply_unary_op(ops.ObjGetAccessUrl(mode=\"R\"))\n", + "\n", + "transcribed_results = bbq.ai.generate(\n", + " prompt=(prompt_text, audio_runtime),\n", + " endpoint=\"gemini-2.0-flash-001\",\n", + " model_params={\"generationConfig\": {\"temperature\": 0.0}},\n", + ")\n", + "\n", + "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 12, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, { "data": { + "text/html": [ + "
0    {'status': '', 'content': 'Now, as all books, ...
" + ], "text/plain": [ "0 {'status': '', 'content': 'Now, as all books, ...\n", "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 23, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transcribed_series_verbose = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=True)\n", + "# To get verbose results (including status), we can extract both fields from the result struct.\n", + "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", + "transcribed_status_series = transcribed_results.struct.field(\"status\")\n", + "\n", + "transcribed_series_verbose = bpd.DataFrame(\n", + " {\n", + " \"status\": transcribed_status_series,\n", + " \"content\": transcribed_content_series,\n", + " }\n", + ")\n", + "# Package as a struct for consistent display\n", + "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" ] } @@ -1567,7 +1580,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.18" + "version": "3.13.0" } }, "nbformat": 4,