From 62eb3b8e3a4b18a82f78e4523f41148607b08697 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Thu, 9 Apr 2026 15:59:25 +0800 Subject: [PATCH] fix: DataFusion benchmark panicked: failed to cast '2013-07-01' to UInt16 --- datafusion/core/benches/sql_planner.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 59502da98790..fcc8da30fedd 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -130,7 +130,8 @@ fn register_clickbench_hits_table(rt: &Runtime) -> SessionContext { format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}") }; - let sql = format!("CREATE EXTERNAL TABLE hits STORED AS PARQUET LOCATION '{path}'"); + let sql = + format!("CREATE EXTERNAL TABLE hits_raw STORED AS PARQUET LOCATION '{path}'"); // ClickBench partitioned dataset was written by an ancient version of pyarrow that // that wrote strings with the wrong logical type. To read it correctly, we must @@ -139,6 +140,17 @@ fn register_clickbench_hits_table(rt: &Runtime) -> SessionContext { .unwrap(); rt.block_on(ctx.sql(&sql)).unwrap(); + // ClickBench stores EventDate as UInt16 (days since 1970-01-01). Create a view + // that exposes it as SQL DATE so that queries comparing it with date literals + // (e.g. "EventDate >= '2013-07-01'") work correctly during planning. + rt.block_on(ctx.sql( + "CREATE VIEW hits AS \ + SELECT * EXCEPT (\"EventDate\"), \ + CAST(CAST(\"EventDate\" AS INTEGER) AS DATE) AS \"EventDate\" \ + FROM hits_raw", + )) + .unwrap(); + let count = rt.block_on(async { ctx.table("hits").await.unwrap().count().await.unwrap() }); assert!(count > 0);