From f4270d1c22fb223e6345e99a55aab65c1cbfa9a0 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 13 Nov 2025 23:05:34 +0000 Subject: [PATCH] fix(file-based): Switch Excel parser from calamine to openpyxl engine Switch the Excel parser engine from calamine to openpyxl to prevent crashes when parsing Excel files with invalid date values. The calamine engine (Rust-based) panics when encountering date values that result in years outside Python's datetime range (1-9999), causing the entire sync to fail. The openpyxl engine (pure Python) handles these edge cases more gracefully, allowing syncs to complete even with data quality issues. This fixes crashes like: pyo3_runtime.PanicException: failed to construct date: PyErr { type: , value: ValueError('year 20225 is out of range') } Trade-off: openpyxl is slower than calamine, but reliability is more important than speed for production syncs. Fixes: airbytehq/oncall#10097 Co-Authored-By: unknown <> --- airbyte_cdk/sources/file_based/file_types/excel_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index f99ca0180..d32505be5 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -191,4 +191,4 @@ def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame: Returns: pd.DataFrame: Parsed data from the Excel file. """ - return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return] + return pd.ExcelFile(fp, engine="openpyxl").parse() # type: ignore [arg-type, call-overload, no-any-return]