4242from google .cloud import bigquery_storage_v1
4343import google .cloud .bigquery
4444import google .cloud .bigquery as bigquery
45+ import google .cloud .bigquery .table
4546from google .cloud .bigquery_storage_v1 import types as bq_storage_types
4647import pandas
4748import pyarrow as pa
@@ -1004,7 +1005,7 @@ def read_gbq_query(
10041005 configuration = configuration ,
10051006 )
10061007 query_job_for_metrics = query_job
1007- rows = None
1008+ rows : Optional [ google . cloud . bigquery . table . RowIterator ] = None
10081009 else :
10091010 job_config = typing .cast (
10101011 bigquery .QueryJobConfig ,
@@ -1037,8 +1038,8 @@ def read_gbq_query(
10371038 query_job = query_job_for_metrics , row_iterator = rows
10381039 )
10391040
1040- # It's possible that there's no job and corresponding destination table.
1041- # In this case, we must create a local node.
1041+ # It's possible that there's no job and therefore no corresponding
1042+ # destination table. In this case, we must create a local node.
10421043 #
10431044 # TODO(b/420984164): Tune the threshold for which we download to
10441045 # local node. Likely there are a wide range of sizes in which it
@@ -1059,22 +1060,35 @@ def read_gbq_query(
10591060 columns = columns ,
10601061 )
10611062
1062- # If there was no destination table and we've made it this far, that
1063- # means the query must have been DDL or DML. Return some job metadata,
1064- # instead.
1065- if not destination :
1063+ # If the query was DDL or DML, return some job metadata. See
1064+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type
1065+ # for possible statement types. Note that destination table does exist
1066+ # for some DDL operations such as CREATE VIEW, but we don't want to
1067+ # read from that. See internal issue b/444282709.
1068+ if destination is None or (
1069+ query_job_for_metrics is not None
1070+ and query_job_for_metrics .statement_type != "SELECT"
1071+ ):
10661072 return bf_read_gbq_query .create_dataframe_from_query_job_stats (
10671073 query_job_for_metrics ,
10681074 session = self ._session ,
10691075 )
10701076
1077+ # Speed up counts by getting counts from result metadata.
1078+ if rows is not None :
1079+ n_rows = rows .total_rows
1080+ elif query_job_for_metrics is not None :
1081+ n_rows = query_job_for_metrics .result ().total_rows
1082+ else :
1083+ n_rows = None
1084+
10711085 return self .read_gbq_table (
10721086 f"{ destination .project } .{ destination .dataset_id } .{ destination .table_id } " ,
10731087 index_col = index_col ,
10741088 columns = columns ,
10751089 use_cache = configuration ["query" ]["useQueryCache" ],
10761090 force_total_order = force_total_order ,
1077- n_rows = query_job . result (). total_rows ,
1091+ n_rows = n_rows ,
10781092 # max_results and filters are omitted because they are already
10791093 # handled by to_query(), above.
10801094 )
0 commit comments