From 894c5a0b61a428e3c416ed799dbad2b04247c36d Mon Sep 17 00:00:00 2001 From: Alberto Daniel Badia Date: Sat, 24 Jan 2026 13:27:28 -0300 Subject: [PATCH 1/4] FIX: Reading API times as UTC causes wrong date --- src/marketdata/output_handlers/pandas.py | 7 +++-- src/marketdata/output_handlers/polars.py | 18 ++++++++---- src/marketdata/utils.py | 26 +++++++++-------- src/tests/test_options_expirations.py | 37 +++++++++++++++++++++++- src/tests/test_utils.py | 9 ++++-- 5 files changed, 74 insertions(+), 23 deletions(-) diff --git a/src/marketdata/output_handlers/pandas.py b/src/marketdata/output_handlers/pandas.py index 32199f8..01c5351 100644 --- a/src/marketdata/output_handlers/pandas.py +++ b/src/marketdata/output_handlers/pandas.py @@ -57,8 +57,11 @@ def _convert_timestamp_columns( continue try: if format_to_use == DateFormat.TIMESTAMP: - df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert( - default_tz + s = pd.to_datetime(df[col]) + df[col] = ( + s.dt.tz_localize(default_tz) + if s.dt.tz is None + else s.dt.tz_convert(default_tz) ) elif format_to_use == DateFormat.SPREADSHEET: df[col] = pd.to_datetime( diff --git a/src/marketdata/output_handlers/polars.py b/src/marketdata/output_handlers/polars.py index a0d0420..8378371 100644 --- a/src/marketdata/output_handlers/polars.py +++ b/src/marketdata/output_handlers/polars.py @@ -45,18 +45,24 @@ def _convert_timestamp_columns( format_to_use = date_format or DateFormat.UNIX default_tz = pytz.timezone("US/Eastern").zone + TZ_AWARE_REGEX = r"(Z|[+-]\d{2}:?\d{2})$" for col in df.columns: if col not in date_columns: continue try: if format_to_use == DateFormat.TIMESTAMP: - cleaned = pl.col(col).str.replace( - r"(Z|[+-]\d{2}:?\d{2})$", "", literal=False - ) + is_aware = pl.col(col).str.contains(TZ_AWARE_REGEX, literal=False) + cleaned = pl.col(col).str.replace(TZ_AWARE_REGEX, "", literal=False) + dt_expr = cleaned.str.strptime(pl.Datetime, strict=False) + df = df.with_columns( - cleaned.str.strptime(pl.Datetime, strict=False) - .dt.replace_time_zone("UTC") - .dt.convert_time_zone(default_tz) + pl.when(is_aware) + .then( + dt_expr.dt.replace_time_zone("UTC").dt.convert_time_zone( + default_tz + ) + ) + .otherwise(dt_expr.dt.replace_time_zone(default_tz)) .alias(col) ) elif format_to_use == DateFormat.SPREADSHEET: diff --git a/src/marketdata/utils.py b/src/marketdata/utils.py index 7c08f96..32a04ce 100644 --- a/src/marketdata/utils.py +++ b/src/marketdata/utils.py @@ -8,25 +8,27 @@ def format_timestamp(value: str | int | float | None) -> datetime.datetime: + default_tz = pytz.timezone("US/Eastern") + if isinstance(value, str): + if value.endswith("Z"): + value = value[:-1] + "+00:00" try: - return datetime.datetime.fromisoformat(value) - except: - pass - try: - value = float(value) - except: - raise ValueError("Unrecognized date format") + dt = datetime.datetime.fromisoformat(value) + return dt.astimezone(default_tz) if dt.tzinfo else dt + except ValueError: + try: + value = float(value) + except ValueError: + raise ValueError("Unrecognized date format") if isinstance(value, (int, float)): if 0 < value < 60000: return datetime.datetime(1899, 12, 30) + datetime.timedelta(days=value) try: - return datetime.datetime.fromtimestamp( - value, tz=pytz.timezone("US/Eastern") - ) - except: - raise ValueError("Unrecognized date format") + return datetime.datetime.fromtimestamp(value, tz=default_tz) + except (ValueError, OSError, OverflowError): + pass raise ValueError("Unrecognized date format") diff --git a/src/tests/test_options_expirations.py b/src/tests/test_options_expirations.py index a095f94..1018fa0 100644 --- a/src/tests/test_options_expirations.py +++ b/src/tests/test_options_expirations.py @@ -4,7 +4,13 @@ import pytz -from marketdata.input_types.base import OutputFormat +from marketdata.input_types.base import ( + DateFormat, + OutputFormat, + UserUniversalAPIParams, +) +from marketdata.output_handlers.pandas import PandasOutputHandler +from marketdata.output_handlers.polars import PolarsOutputHandler from marketdata.output_types.options_expirations import ( OptionsExpirations, OptionsExpirationsHumanReadable, @@ -194,3 +200,32 @@ def test_get_options_expirations_response_200_csv(respx_mock, client): symbol="AAPL", output_format=OutputFormat.CSV, filename="test.csv" ) assert pathlib.Path(output).read_text() == "AS RECEIVED FROM API" + + +def test_pandas_handler_date_only_localization(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams(date_format=DateFormat.TIMESTAMP) + handler = PandasOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"].iloc[0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 + assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] + + +def test_polars_handler_date_only_localization(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams(date_format=DateFormat.TIMESTAMP) + handler = PolarsOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"][0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 586528c..8f1a9d7 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -28,12 +28,17 @@ def test_format_timestamp(): ) with pytest.raises(ValueError): format_timestamp("2024-01-01 12:00:00.0:00:00") - with pytest.raises(ValueError): - format_timestamp(99999999999999) with pytest.raises(ValueError): format_timestamp(None) +def test_format_timestamp_date_only_localization(): + val = "2026-02-20" + dt = format_timestamp(val) + assert dt == datetime.datetime(2026, 2, 20, 0, 0, 0) + assert dt.tzinfo is None + + def test_check_is_date(): assert check_is_date("2024-01-01") == True assert check_is_date(datetime.date(2024, 1, 1)) == True From e0a5d568813063f2b8460a9868fda46cda39efee Mon Sep 17 00:00:00 2001 From: Alberto Daniel Badia Date: Sat, 24 Jan 2026 13:33:39 -0300 Subject: [PATCH 2/4] FIX: Missing edge cases tests --- src/tests/test_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 8f1a9d7..54687c1 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -26,8 +26,25 @@ def test_format_timestamp(): assert format_timestamp(1714732800.0) == datetime.datetime.fromtimestamp( 1714732800, tz=pytz.timezone("US/Eastern") ) + # Test 'Z' suffix for Python < 3.11 compatibility + # Construct expected datetime using localize to avoid pytz LMT issues + expected_z = pytz.timezone("US/Eastern").localize( + datetime.datetime(2024, 1, 1, 7, 0, 0) + ) + assert format_timestamp("2024-01-01T12:00:00Z") == expected_z + with pytest.raises(ValueError): format_timestamp("2024-01-01 12:00:00.0:00:00") + # Coverage for line 21-23 (string that's not float) + with pytest.raises(ValueError): + format_timestamp("invalid-date") + # Test numeric exceptions (OSError/OverflowError) - coverage for line 30-31 + with pytest.raises(ValueError): + format_timestamp(99999999999999) + # Coverage for line 33 (final fallback) + with pytest.raises(ValueError): + # List is not str, int, float, or None + format_timestamp([]) with pytest.raises(ValueError): format_timestamp(None) From a810f841ad7065dd418e73cd8d6c99a1135a7edf Mon Sep 17 00:00:00 2001 From: MarketDataDev01 Date: Wed, 28 Jan 2026 15:17:59 -0300 Subject: [PATCH 3/4] Fix tz conversion for date-only values used in expiration to show 00:00 US/Eastern datetimes instead of previous day --- src/marketdata/output_handlers/pandas.py | 105 ++++++++++++++--- src/marketdata/output_handlers/polars.py | 66 ++++++++--- src/tests/test_options_expirations.py | 58 +++++++++ src/tests/test_output_handlers.py | 143 +++++++++++++++++++++++ 4 files changed, 339 insertions(+), 33 deletions(-) diff --git a/src/marketdata/output_handlers/pandas.py b/src/marketdata/output_handlers/pandas.py index 01c5351..fad434d 100644 --- a/src/marketdata/output_handlers/pandas.py +++ b/src/marketdata/output_handlers/pandas.py @@ -46,31 +46,108 @@ def _convert_timestamp_columns( date_format: DateFormat | None, ) -> pd.DataFrame: """Convert date/time columns to timezone-aware datetime objects.""" - if date_format == DateFormat.UNIX: - return df - + convert_numeric = date_format != DateFormat.UNIX format_to_use = date_format or DateFormat.UNIX default_tz = pytz.timezone("US/Eastern") + date_only_regex = r"^\d{4}-\d{2}-\d{2}$" for col in df.columns: if col not in date_columns: continue try: + series = df[col] + date_only_mask = series.astype("string").str.match( + date_only_regex, na=False + ) + updated_series = None + if date_only_mask.any(): + date_only = pd.to_datetime( + series[date_only_mask], format="%Y-%m-%d", errors="coerce" + ) + date_only = date_only.dt.tz_localize(default_tz) + updated_series = series.astype("object") + updated_series.loc[date_only_mask] = date_only + if format_to_use == DateFormat.TIMESTAMP: - s = pd.to_datetime(df[col]) - df[col] = ( - s.dt.tz_localize(default_tz) - if s.dt.tz is None - else s.dt.tz_convert(default_tz) + remaining = ( + series[~date_only_mask] if date_only_mask.any() else series ) + parsed = pd.to_datetime(remaining, errors="coerce") + if parsed.dt.tz is None: + parsed = parsed.dt.tz_localize(default_tz) + else: + parsed = parsed.dt.tz_convert(default_tz) + parsed_mask = parsed.notna() + if parsed_mask.any(): + if date_only_mask.any(): + updated_series.loc[remaining.index[parsed_mask]] = parsed[ + parsed_mask + ] + elif parsed_mask.all(): + df[col] = parsed + else: + updated_series = series.astype("object") + updated_series.loc[remaining.index[parsed_mask]] = parsed[ + parsed_mask + ] elif format_to_use == DateFormat.SPREADSHEET: - df[col] = pd.to_datetime( - df[col], unit="D", origin="1899-12-30", utc=True - ).dt.tz_convert(default_tz) + if convert_numeric: + remaining = ( + series[~date_only_mask] if date_only_mask.any() else series + ) + numeric = pd.to_numeric(remaining, errors="coerce") + numeric_mask = numeric.notna() + if numeric_mask.any(): + converted = pd.to_datetime( + numeric[numeric_mask], + unit="D", + origin="1899-12-30", + utc=True, + ).dt.tz_convert(default_tz) + if date_only_mask.any(): + updated_series.loc[remaining.index[numeric_mask]] = ( + converted + ) + elif numeric_mask.all(): + full_converted = pd.to_datetime( + numeric, + unit="D", + origin="1899-12-30", + utc=True, + ).dt.tz_convert(default_tz) + df[col] = full_converted + else: + updated_series = series.astype("object") + updated_series.loc[remaining.index[numeric_mask]] = ( + converted + ) else: - df[col] = pd.to_datetime(df[col], unit="s", utc=True).dt.tz_convert( - default_tz - ) + if convert_numeric: + remaining = ( + series[~date_only_mask] if date_only_mask.any() else series + ) + numeric = pd.to_numeric(remaining, errors="coerce") + numeric_mask = numeric.notna() + if numeric_mask.any(): + converted = pd.to_datetime( + numeric[numeric_mask], unit="s", utc=True + ).dt.tz_convert(default_tz) + if date_only_mask.any(): + updated_series.loc[remaining.index[numeric_mask]] = ( + converted + ) + elif numeric_mask.all(): + full_converted = pd.to_datetime( + numeric, unit="s", utc=True + ).dt.tz_convert(default_tz) + df[col] = full_converted + else: + updated_series = series.astype("object") + updated_series.loc[remaining.index[numeric_mask]] = ( + converted + ) + if updated_series is not None: + df[col] = updated_series except (ValueError, TypeError, AttributeError): pass diff --git a/src/marketdata/output_handlers/polars.py b/src/marketdata/output_handlers/polars.py index 8378371..3078e36 100644 --- a/src/marketdata/output_handlers/polars.py +++ b/src/marketdata/output_handlers/polars.py @@ -39,24 +39,28 @@ def _convert_timestamp_columns( date_format: DateFormat | None, ) -> pl.DataFrame: """Convert date/time columns to timezone-aware datetime objects.""" - if date_format == DateFormat.UNIX: - return df - + convert_numeric = date_format != DateFormat.UNIX format_to_use = date_format or DateFormat.UNIX default_tz = pytz.timezone("US/Eastern").zone + DATE_ONLY_REGEX = r"^\d{4}-\d{2}-\d{2}$" TZ_AWARE_REGEX = r"(Z|[+-]\d{2}:?\d{2})$" for col in df.columns: if col not in date_columns: continue try: + col_dtype = df.schema.get(col) if format_to_use == DateFormat.TIMESTAMP: - is_aware = pl.col(col).str.contains(TZ_AWARE_REGEX, literal=False) - cleaned = pl.col(col).str.replace(TZ_AWARE_REGEX, "", literal=False) + as_text = pl.col(col).cast(pl.Utf8, strict=False) + is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) + is_aware = as_text.str.contains(TZ_AWARE_REGEX, literal=False) + cleaned = as_text.str.replace(TZ_AWARE_REGEX, "", literal=False) dt_expr = cleaned.str.strptime(pl.Datetime, strict=False) df = df.with_columns( - pl.when(is_aware) + pl.when(is_date_only) + .then(dt_expr.dt.replace_time_zone(default_tz)) + .when(is_aware) .then( dt_expr.dt.replace_time_zone("UTC").dt.convert_time_zone( default_tz @@ -66,23 +70,47 @@ def _convert_timestamp_columns( .alias(col) ) elif format_to_use == DateFormat.SPREADSHEET: - df = df.with_columns( - pl.from_epoch( - ((pl.col(col).cast(pl.Float64) - 25569) * 86400).cast( - pl.Int64 - ), - time_unit="s", + as_text = pl.col(col).cast(pl.Utf8, strict=False) + is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) + date_only_expr = as_text.str.strptime( + pl.Datetime, format="%Y-%m-%d", strict=False + ).dt.replace_time_zone(default_tz) + numeric = pl.col(col).cast(pl.Float64, strict=False) + parsed = pl.when(is_date_only).then(date_only_expr) + if convert_numeric: + parsed = parsed.when(numeric.is_not_null()).then( + pl.from_epoch( + ((numeric - 25569) * 86400).cast(pl.Int64), + time_unit="s", + ) + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(default_tz) ) - .dt.replace_time_zone("UTC") - .dt.convert_time_zone(default_tz) - .alias(col) + df = df.with_columns( + parsed.otherwise( + pl.lit(None, dtype=pl.Datetime("us", default_tz)) + ).alias(col) ) else: + if not convert_numeric and col_dtype != pl.Utf8: + continue + as_text = pl.col(col).cast(pl.Utf8, strict=False) + is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) + date_only_expr = as_text.str.strptime( + pl.Datetime, format="%Y-%m-%d", strict=False + ).dt.replace_time_zone(default_tz) + numeric = pl.col(col).cast(pl.Float64, strict=False) + parsed = pl.when(is_date_only).then(date_only_expr) + if convert_numeric: + parsed = parsed.when(numeric.is_not_null()).then( + pl.from_epoch(numeric, time_unit="s") + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(default_tz) + ) df = df.with_columns( - pl.from_epoch(pl.col(col), time_unit="s") - .dt.replace_time_zone("UTC") - .dt.convert_time_zone(default_tz) - .alias(col) + parsed.otherwise( + pl.lit(None, dtype=pl.Datetime("us", default_tz)) + ).alias(col) ) except (ValueError, TypeError, AttributeError, pl.exceptions.PolarsError): pass diff --git a/src/tests/test_options_expirations.py b/src/tests/test_options_expirations.py index 1018fa0..70a494c 100644 --- a/src/tests/test_options_expirations.py +++ b/src/tests/test_options_expirations.py @@ -217,6 +217,36 @@ def test_pandas_handler_date_only_localization(): assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] +def test_pandas_handler_date_only_with_default_format(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams() + handler = PandasOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"].iloc[0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 + assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] + + +def test_pandas_handler_date_only_with_unix_format(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams(date_format=DateFormat.UNIX) + handler = PandasOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"].iloc[0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 + assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] + + def test_polars_handler_date_only_localization(): data = {"expirations": ["2026-02-20"]} params = UserUniversalAPIParams(date_format=DateFormat.TIMESTAMP) @@ -229,3 +259,31 @@ def test_polars_handler_date_only_localization(): assert result_dt.month == 2 assert result_dt.day == 20 assert result_dt.hour == 0 + + +def test_polars_handler_date_only_with_default_format(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams() + handler = PolarsOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"][0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 + + +def test_polars_handler_date_only_with_unix_format(): + data = {"expirations": ["2026-02-20"]} + params = UserUniversalAPIParams(date_format=DateFormat.UNIX) + handler = PolarsOutputHandler(data, OptionsExpirations, params) + + df = handler.get_result() + result_dt = df["expirations"][0] + + assert result_dt.year == 2026 + assert result_dt.month == 2 + assert result_dt.day == 20 + assert result_dt.hour == 0 diff --git a/src/tests/test_output_handlers.py b/src/tests/test_output_handlers.py index ea673a9..2472917 100644 --- a/src/tests/test_output_handlers.py +++ b/src/tests/test_output_handlers.py @@ -3,6 +3,7 @@ from typing import Union from unittest.mock import patch +import pandas as pd import polars as pl import pytest import pytz @@ -362,6 +363,132 @@ def test_pandas_output_handler_multiple_timestamp_columns(): assert hasattr(df[col].dtype, "tz") or "datetime" in str(df[col].dtype) +def test_pandas_output_handler_timestamp_date_only_and_parsed(): + """Test date-only parsing with mixed valid/invalid timestamps.""" + handler = PandasOutputHandler( + data={ + "updated": [ + "2026-02-20", + "2026-02-21T12:30:00", + "invalid", + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(date_format=DateFormat.TIMESTAMP), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].hour == 0 + assert df["updated"].iloc[1].tz is not None + assert df["updated"].iloc[2] == "invalid" + + +def test_pandas_output_handler_timestamp_partial_parse_without_date_only(): + """Test partial timestamp conversion without date-only values.""" + handler = PandasOutputHandler( + data={ + "updated": [ + "2026-02-21T12:30:00", + "invalid", + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(date_format=DateFormat.TIMESTAMP), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].tz is not None + assert df["updated"].iloc[1] == "invalid" + + +def test_pandas_output_handler_spreadsheet_date_only_and_numeric(): + """Test spreadsheet conversion with date-only and numeric values.""" + handler = PandasOutputHandler( + data={ + "updated": [ + "2026-02-20", + 45000, + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(date_format=DateFormat.SPREADSHEET), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].hour == 0 + assert df["updated"].iloc[1].tz is not None + + +def test_pandas_output_handler_spreadsheet_partial_numeric_no_date_only(): + """Test spreadsheet conversion with invalid non-date values.""" + handler = PandasOutputHandler( + data={ + "updated": [ + 45000, + "invalid", + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(date_format=DateFormat.SPREADSHEET), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].tz is not None + assert df["updated"].iloc[1] == "invalid" + + +def test_pandas_output_handler_unix_date_only_and_numeric(): + """Test unix conversion with date-only and numeric values.""" + handler = PandasOutputHandler( + data={ + "updated": [ + "2026-02-20", + 1765552906, + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].hour == 0 + assert df["updated"].iloc[1].tz is not None + + +def test_pandas_output_handler_unix_partial_numeric_no_date_only(): + """Test unix conversion with invalid non-date values.""" + handler = PandasOutputHandler( + data={ + "updated": [ + 1765552906, + "invalid", + ], + }, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0].tz is not None + assert df["updated"].iloc[1] == "invalid" + + +def test_pandas_output_handler_timestamp_conversion_exception(monkeypatch): + """Test that exceptions during conversion are handled gracefully.""" + def _raise(*args, **kwargs): + raise ValueError("boom") + + monkeypatch.setattr(pd.Series, "astype", _raise) + handler = PandasOutputHandler( + data={"updated": [1765552906]}, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(), + ) + df = handler.get_result() + assert df is not None + assert df["updated"].iloc[0] == 1765552906 + + def test_polars_output_handler_convert_timestamp_by_name(): """Test that timestamp columns are converted by column name.""" @@ -475,6 +602,22 @@ def test_polars_output_handler_timestamp_conversion_failure(): assert df["price"].dtype in [pl.Float64, pl.Float32] +def test_polars_output_handler_timestamp_conversion_exception(monkeypatch): + """Test that exceptions during conversion are handled gracefully.""" + def _raise(*args, **kwargs): + raise pl.exceptions.PolarsError("boom") + + monkeypatch.setattr(pl.DataFrame, "with_columns", _raise) + handler = PolarsOutputHandler( + data={"updated": [1765552906]}, + output_schema=DummySchemaUpdated, + user_universal_params=_make_params(), + ) + df = handler.get_result() + assert df is not None + assert df["updated"][0] == 1765552906 + + def test_pandas_output_handler_normalized_dataframe_fallback(): """Test that normalized dataframe fallback path is used when plain dataframe fails.""" handler = PandasOutputHandler( From f763f68a0a0388603fc5c3a2db5d92c52285bf52 Mon Sep 17 00:00:00 2001 From: santiago guichard Date: Fri, 13 Feb 2026 16:39:11 -0300 Subject: [PATCH 4/4] changed expiration to force dateformat=unix to get complete timestamp, removed previous pandas and polar logic to parse expiration dates, updated tests --- src/marketdata/output_handlers/pandas.py | 102 ++----------- src/marketdata/output_handlers/polars.py | 78 +++------- .../resources/options/expirations.py | 6 + ...ptions_expirations_human_response_200.json | 44 +++--- .../options_expirations_response_200.json | 44 +++--- src/tests/test_options_expirations.py | 133 +++------------- src/tests/test_output_handlers.py | 143 ------------------ 7 files changed, 106 insertions(+), 444 deletions(-) diff --git a/src/marketdata/output_handlers/pandas.py b/src/marketdata/output_handlers/pandas.py index fad434d..32199f8 100644 --- a/src/marketdata/output_handlers/pandas.py +++ b/src/marketdata/output_handlers/pandas.py @@ -46,108 +46,28 @@ def _convert_timestamp_columns( date_format: DateFormat | None, ) -> pd.DataFrame: """Convert date/time columns to timezone-aware datetime objects.""" - convert_numeric = date_format != DateFormat.UNIX + if date_format == DateFormat.UNIX: + return df + format_to_use = date_format or DateFormat.UNIX default_tz = pytz.timezone("US/Eastern") - date_only_regex = r"^\d{4}-\d{2}-\d{2}$" for col in df.columns: if col not in date_columns: continue try: - series = df[col] - date_only_mask = series.astype("string").str.match( - date_only_regex, na=False - ) - updated_series = None - if date_only_mask.any(): - date_only = pd.to_datetime( - series[date_only_mask], format="%Y-%m-%d", errors="coerce" - ) - date_only = date_only.dt.tz_localize(default_tz) - updated_series = series.astype("object") - updated_series.loc[date_only_mask] = date_only - if format_to_use == DateFormat.TIMESTAMP: - remaining = ( - series[~date_only_mask] if date_only_mask.any() else series + df[col] = pd.to_datetime(df[col], utc=True).dt.tz_convert( + default_tz ) - parsed = pd.to_datetime(remaining, errors="coerce") - if parsed.dt.tz is None: - parsed = parsed.dt.tz_localize(default_tz) - else: - parsed = parsed.dt.tz_convert(default_tz) - parsed_mask = parsed.notna() - if parsed_mask.any(): - if date_only_mask.any(): - updated_series.loc[remaining.index[parsed_mask]] = parsed[ - parsed_mask - ] - elif parsed_mask.all(): - df[col] = parsed - else: - updated_series = series.astype("object") - updated_series.loc[remaining.index[parsed_mask]] = parsed[ - parsed_mask - ] elif format_to_use == DateFormat.SPREADSHEET: - if convert_numeric: - remaining = ( - series[~date_only_mask] if date_only_mask.any() else series - ) - numeric = pd.to_numeric(remaining, errors="coerce") - numeric_mask = numeric.notna() - if numeric_mask.any(): - converted = pd.to_datetime( - numeric[numeric_mask], - unit="D", - origin="1899-12-30", - utc=True, - ).dt.tz_convert(default_tz) - if date_only_mask.any(): - updated_series.loc[remaining.index[numeric_mask]] = ( - converted - ) - elif numeric_mask.all(): - full_converted = pd.to_datetime( - numeric, - unit="D", - origin="1899-12-30", - utc=True, - ).dt.tz_convert(default_tz) - df[col] = full_converted - else: - updated_series = series.astype("object") - updated_series.loc[remaining.index[numeric_mask]] = ( - converted - ) + df[col] = pd.to_datetime( + df[col], unit="D", origin="1899-12-30", utc=True + ).dt.tz_convert(default_tz) else: - if convert_numeric: - remaining = ( - series[~date_only_mask] if date_only_mask.any() else series - ) - numeric = pd.to_numeric(remaining, errors="coerce") - numeric_mask = numeric.notna() - if numeric_mask.any(): - converted = pd.to_datetime( - numeric[numeric_mask], unit="s", utc=True - ).dt.tz_convert(default_tz) - if date_only_mask.any(): - updated_series.loc[remaining.index[numeric_mask]] = ( - converted - ) - elif numeric_mask.all(): - full_converted = pd.to_datetime( - numeric, unit="s", utc=True - ).dt.tz_convert(default_tz) - df[col] = full_converted - else: - updated_series = series.astype("object") - updated_series.loc[remaining.index[numeric_mask]] = ( - converted - ) - if updated_series is not None: - df[col] = updated_series + df[col] = pd.to_datetime(df[col], unit="s", utc=True).dt.tz_convert( + default_tz + ) except (ValueError, TypeError, AttributeError): pass diff --git a/src/marketdata/output_handlers/polars.py b/src/marketdata/output_handlers/polars.py index 3078e36..a0d0420 100644 --- a/src/marketdata/output_handlers/polars.py +++ b/src/marketdata/output_handlers/polars.py @@ -39,78 +39,44 @@ def _convert_timestamp_columns( date_format: DateFormat | None, ) -> pl.DataFrame: """Convert date/time columns to timezone-aware datetime objects.""" - convert_numeric = date_format != DateFormat.UNIX + if date_format == DateFormat.UNIX: + return df + format_to_use = date_format or DateFormat.UNIX default_tz = pytz.timezone("US/Eastern").zone - DATE_ONLY_REGEX = r"^\d{4}-\d{2}-\d{2}$" - TZ_AWARE_REGEX = r"(Z|[+-]\d{2}:?\d{2})$" for col in df.columns: if col not in date_columns: continue try: - col_dtype = df.schema.get(col) if format_to_use == DateFormat.TIMESTAMP: - as_text = pl.col(col).cast(pl.Utf8, strict=False) - is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) - is_aware = as_text.str.contains(TZ_AWARE_REGEX, literal=False) - cleaned = as_text.str.replace(TZ_AWARE_REGEX, "", literal=False) - dt_expr = cleaned.str.strptime(pl.Datetime, strict=False) - + cleaned = pl.col(col).str.replace( + r"(Z|[+-]\d{2}:?\d{2})$", "", literal=False + ) df = df.with_columns( - pl.when(is_date_only) - .then(dt_expr.dt.replace_time_zone(default_tz)) - .when(is_aware) - .then( - dt_expr.dt.replace_time_zone("UTC").dt.convert_time_zone( - default_tz - ) - ) - .otherwise(dt_expr.dt.replace_time_zone(default_tz)) + cleaned.str.strptime(pl.Datetime, strict=False) + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(default_tz) .alias(col) ) elif format_to_use == DateFormat.SPREADSHEET: - as_text = pl.col(col).cast(pl.Utf8, strict=False) - is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) - date_only_expr = as_text.str.strptime( - pl.Datetime, format="%Y-%m-%d", strict=False - ).dt.replace_time_zone(default_tz) - numeric = pl.col(col).cast(pl.Float64, strict=False) - parsed = pl.when(is_date_only).then(date_only_expr) - if convert_numeric: - parsed = parsed.when(numeric.is_not_null()).then( - pl.from_epoch( - ((numeric - 25569) * 86400).cast(pl.Int64), - time_unit="s", - ) - .dt.replace_time_zone("UTC") - .dt.convert_time_zone(default_tz) - ) df = df.with_columns( - parsed.otherwise( - pl.lit(None, dtype=pl.Datetime("us", default_tz)) - ).alias(col) + pl.from_epoch( + ((pl.col(col).cast(pl.Float64) - 25569) * 86400).cast( + pl.Int64 + ), + time_unit="s", + ) + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(default_tz) + .alias(col) ) else: - if not convert_numeric and col_dtype != pl.Utf8: - continue - as_text = pl.col(col).cast(pl.Utf8, strict=False) - is_date_only = as_text.str.contains(DATE_ONLY_REGEX, literal=False) - date_only_expr = as_text.str.strptime( - pl.Datetime, format="%Y-%m-%d", strict=False - ).dt.replace_time_zone(default_tz) - numeric = pl.col(col).cast(pl.Float64, strict=False) - parsed = pl.when(is_date_only).then(date_only_expr) - if convert_numeric: - parsed = parsed.when(numeric.is_not_null()).then( - pl.from_epoch(numeric, time_unit="s") - .dt.replace_time_zone("UTC") - .dt.convert_time_zone(default_tz) - ) df = df.with_columns( - parsed.otherwise( - pl.lit(None, dtype=pl.Datetime("us", default_tz)) - ).alias(col) + pl.from_epoch(pl.col(col), time_unit="s") + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(default_tz) + .alias(col) ) except (ValueError, TypeError, AttributeError, pl.exceptions.PolarsError): pass diff --git a/src/marketdata/resources/options/expirations.py b/src/marketdata/resources/options/expirations.py index 88dc7c7..baab437 100644 --- a/src/marketdata/resources/options/expirations.py +++ b/src/marketdata/resources/options/expirations.py @@ -39,6 +39,11 @@ def expirations( self.client.default_params, user_universal_params ) + # Force dateformat=unix so the API returns unix timestamps instead of + # date-only strings (yyyy-mm-dd) which caused off-by-one-day errors + # when converted to datetime objects with timezone info. + user_universal_params.date_format = None + url = self._build_url( path=f"options/expirations/{symbol}/", user_universal_params=user_universal_params, @@ -46,6 +51,7 @@ def expirations( extra_params=kwargs, excluded_params=["symbol"], ) + url += "&dateformat=unix" self.logger.debug("Fetching options expirations...") response = self.client._make_request(method="GET", url=url) diff --git a/src/tests/data/options_expirations_human_response_200.json b/src/tests/data/options_expirations_human_response_200.json index ae5bb10..2481f8b 100644 --- a/src/tests/data/options_expirations_human_response_200.json +++ b/src/tests/data/options_expirations_human_response_200.json @@ -1,27 +1,27 @@ { "Expirations": [ - "2025-12-12", - "2025-12-19", - "2025-12-26", - "2026-01-02", - "2026-01-09", - "2026-01-16", - "2026-01-23", - "2026-01-30", - "2026-02-20", - "2026-03-20", - "2026-04-17", - "2026-05-15", - "2026-06-18", - "2026-07-17", - "2026-08-21", - "2026-09-18", - "2026-12-18", - "2027-01-15", - "2027-06-17", - "2027-12-17", - "2028-01-21", - "2028-03-17" + 1765515600, + 1766120400, + 1766725200, + 1767330000, + 1767934800, + 1768539600, + 1769144400, + 1769749200, + 1771563600, + 1773979200, + 1776398400, + 1778817600, + 1781755200, + 1784260800, + 1787284800, + 1789704000, + 1797570000, + 1799989200, + 1813204800, + 1829019600, + 1832043600, + 1836878400 ], "Date": 1765561297 } \ No newline at end of file diff --git a/src/tests/data/options_expirations_response_200.json b/src/tests/data/options_expirations_response_200.json index dce4a70..73a2596 100644 --- a/src/tests/data/options_expirations_response_200.json +++ b/src/tests/data/options_expirations_response_200.json @@ -1,28 +1,28 @@ { "s": "ok", "expirations": [ - "2025-12-05", - "2025-12-12", - "2025-12-19", - "2025-12-26", - "2026-01-02", - "2026-01-09", - "2026-01-16", - "2026-01-23", - "2026-02-20", - "2026-03-20", - "2026-04-17", - "2026-05-15", - "2026-06-18", - "2026-07-17", - "2026-08-21", - "2026-09-18", - "2026-12-18", - "2027-01-15", - "2027-06-17", - "2027-12-17", - "2028-01-21", - "2028-03-17" + 1764910800, + 1765515600, + 1766120400, + 1766725200, + 1767330000, + 1767934800, + 1768539600, + 1769144400, + 1771563600, + 1773979200, + 1776398400, + 1778817600, + 1781755200, + 1784260800, + 1787284800, + 1789704000, + 1797570000, + 1799989200, + 1813204800, + 1829019600, + 1832043600, + 1836878400 ], "updated": 1764941963 } \ No newline at end of file diff --git a/src/tests/test_options_expirations.py b/src/tests/test_options_expirations.py index 70a494c..f0dc705 100644 --- a/src/tests/test_options_expirations.py +++ b/src/tests/test_options_expirations.py @@ -5,29 +5,27 @@ import pytz from marketdata.input_types.base import ( - DateFormat, OutputFormat, - UserUniversalAPIParams, ) -from marketdata.output_handlers.pandas import PandasOutputHandler -from marketdata.output_handlers.polars import PolarsOutputHandler from marketdata.output_types.options_expirations import ( OptionsExpirations, OptionsExpirationsHumanReadable, ) from marketdata.sdk_error import MarketDataClientErrorResult +ET = pytz.timezone("US/Eastern") + def test_options_expirations_str(): timestamp = int( datetime.datetime( - 2025, 1, 1, 0, 0, 0, 0, pytz.timezone("US/Eastern") + 2025, 1, 1, 0, 0, 0, 0, ET ).timestamp() ) instance = OptionsExpirations( s="ok", - expirations=["2025-01-01"], + expirations=[timestamp], updated=timestamp, ) @@ -37,7 +35,7 @@ def test_options_expirations_str(): def test_options_expirations_human_readable_str(): timestamp = int( datetime.datetime( - 2025, 1, 1, 0, 0, 0, 0, pytz.timezone("US/Eastern") + 2025, 1, 1, 0, 0, 0, 0, ET ).timestamp() ) instance = OptionsExpirationsHumanReadable( @@ -60,13 +58,14 @@ def test_get_options_expirations_response_200_internal(load_json, respx_mock, cl ) assert expirations.s == "ok" assert len(expirations.expirations) == 22 - # Date strings are parsed as naive datetimes by format_timestamp - assert expirations.expirations[0] == datetime.datetime(2025, 12, 5, 0, 0) - # API returns UTC, convert to US/Eastern for comparison - expected = datetime.datetime( - 2025, 12, 5, 13, 39, 23, tzinfo=datetime.timezone.utc - ).astimezone(pytz.timezone("US/Eastern")) - assert expirations.updated.astimezone(pytz.timezone("US/Eastern")) == expected + # Unix timestamps are converted to US/Eastern datetimes + assert expirations.expirations[0] == datetime.datetime.fromtimestamp( + 1764910800, tz=ET + ) + assert expirations.expirations[0].date() == datetime.date(2025, 12, 5) + assert expirations.updated == datetime.datetime.fromtimestamp( + 1764941963, tz=ET + ) def test_get_options_expirations_response_200_json(load_json, respx_mock, client): @@ -92,13 +91,14 @@ def test_get_options_expirations_human_response_200(load_json, respx_mock, clien expirations = client.options.expirations( symbol="AAPL", output_format=OutputFormat.INTERNAL, use_human_readable=True ) - # Date strings are parsed as naive datetimes by format_timestamp - assert expirations.Expirations[0] == datetime.datetime(2025, 12, 12, 0, 0) - # API returns UTC, convert to US/Eastern for comparison - expected = datetime.datetime( - 2025, 12, 12, 17, 41, 37, tzinfo=datetime.timezone.utc - ).astimezone(pytz.timezone("US/Eastern")) - assert expirations.Date.astimezone(pytz.timezone("US/Eastern")) == expected + # Unix timestamps are converted to US/Eastern datetimes + assert expirations.Expirations[0] == datetime.datetime.fromtimestamp( + 1765515600, tz=ET + ) + assert expirations.Expirations[0].date() == datetime.date(2025, 12, 12) + assert expirations.Date == datetime.datetime.fromtimestamp( + 1765561297, tz=ET + ) def test_get_options_expirations_response_200_dataframe_pandas( @@ -123,7 +123,7 @@ def test_get_options_expirations_response_200_dataframe_pandas( assert "s" not in expirations.columns assert len(expirations) == 22 assert expirations["updated"].iloc[0] == datetime.datetime.fromtimestamp( - 1764941963, tz=pytz.timezone("US/Eastern") + 1764941963, tz=ET ) @@ -148,7 +148,7 @@ def test_get_options_expirations_response_200_dataframe_polars( assert "s" not in expirations.columns assert len(expirations) == 22 assert expirations["updated"][0] == datetime.datetime.fromtimestamp( - 1764941963, tz=pytz.timezone("US/Eastern") + 1764941963, tz=ET ) @@ -200,90 +200,3 @@ def test_get_options_expirations_response_200_csv(respx_mock, client): symbol="AAPL", output_format=OutputFormat.CSV, filename="test.csv" ) assert pathlib.Path(output).read_text() == "AS RECEIVED FROM API" - - -def test_pandas_handler_date_only_localization(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams(date_format=DateFormat.TIMESTAMP) - handler = PandasOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"].iloc[0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 - assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] - - -def test_pandas_handler_date_only_with_default_format(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams() - handler = PandasOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"].iloc[0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 - assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] - - -def test_pandas_handler_date_only_with_unix_format(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams(date_format=DateFormat.UNIX) - handler = PandasOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"].iloc[0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 - assert str(result_dt.tzinfo) in ["US/Eastern", "EDT", "EST"] - - -def test_polars_handler_date_only_localization(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams(date_format=DateFormat.TIMESTAMP) - handler = PolarsOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"][0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 - - -def test_polars_handler_date_only_with_default_format(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams() - handler = PolarsOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"][0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 - - -def test_polars_handler_date_only_with_unix_format(): - data = {"expirations": ["2026-02-20"]} - params = UserUniversalAPIParams(date_format=DateFormat.UNIX) - handler = PolarsOutputHandler(data, OptionsExpirations, params) - - df = handler.get_result() - result_dt = df["expirations"][0] - - assert result_dt.year == 2026 - assert result_dt.month == 2 - assert result_dt.day == 20 - assert result_dt.hour == 0 diff --git a/src/tests/test_output_handlers.py b/src/tests/test_output_handlers.py index 2472917..ea673a9 100644 --- a/src/tests/test_output_handlers.py +++ b/src/tests/test_output_handlers.py @@ -3,7 +3,6 @@ from typing import Union from unittest.mock import patch -import pandas as pd import polars as pl import pytest import pytz @@ -363,132 +362,6 @@ def test_pandas_output_handler_multiple_timestamp_columns(): assert hasattr(df[col].dtype, "tz") or "datetime" in str(df[col].dtype) -def test_pandas_output_handler_timestamp_date_only_and_parsed(): - """Test date-only parsing with mixed valid/invalid timestamps.""" - handler = PandasOutputHandler( - data={ - "updated": [ - "2026-02-20", - "2026-02-21T12:30:00", - "invalid", - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(date_format=DateFormat.TIMESTAMP), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].hour == 0 - assert df["updated"].iloc[1].tz is not None - assert df["updated"].iloc[2] == "invalid" - - -def test_pandas_output_handler_timestamp_partial_parse_without_date_only(): - """Test partial timestamp conversion without date-only values.""" - handler = PandasOutputHandler( - data={ - "updated": [ - "2026-02-21T12:30:00", - "invalid", - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(date_format=DateFormat.TIMESTAMP), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].tz is not None - assert df["updated"].iloc[1] == "invalid" - - -def test_pandas_output_handler_spreadsheet_date_only_and_numeric(): - """Test spreadsheet conversion with date-only and numeric values.""" - handler = PandasOutputHandler( - data={ - "updated": [ - "2026-02-20", - 45000, - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(date_format=DateFormat.SPREADSHEET), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].hour == 0 - assert df["updated"].iloc[1].tz is not None - - -def test_pandas_output_handler_spreadsheet_partial_numeric_no_date_only(): - """Test spreadsheet conversion with invalid non-date values.""" - handler = PandasOutputHandler( - data={ - "updated": [ - 45000, - "invalid", - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(date_format=DateFormat.SPREADSHEET), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].tz is not None - assert df["updated"].iloc[1] == "invalid" - - -def test_pandas_output_handler_unix_date_only_and_numeric(): - """Test unix conversion with date-only and numeric values.""" - handler = PandasOutputHandler( - data={ - "updated": [ - "2026-02-20", - 1765552906, - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].hour == 0 - assert df["updated"].iloc[1].tz is not None - - -def test_pandas_output_handler_unix_partial_numeric_no_date_only(): - """Test unix conversion with invalid non-date values.""" - handler = PandasOutputHandler( - data={ - "updated": [ - 1765552906, - "invalid", - ], - }, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0].tz is not None - assert df["updated"].iloc[1] == "invalid" - - -def test_pandas_output_handler_timestamp_conversion_exception(monkeypatch): - """Test that exceptions during conversion are handled gracefully.""" - def _raise(*args, **kwargs): - raise ValueError("boom") - - monkeypatch.setattr(pd.Series, "astype", _raise) - handler = PandasOutputHandler( - data={"updated": [1765552906]}, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(), - ) - df = handler.get_result() - assert df is not None - assert df["updated"].iloc[0] == 1765552906 - - def test_polars_output_handler_convert_timestamp_by_name(): """Test that timestamp columns are converted by column name.""" @@ -602,22 +475,6 @@ def test_polars_output_handler_timestamp_conversion_failure(): assert df["price"].dtype in [pl.Float64, pl.Float32] -def test_polars_output_handler_timestamp_conversion_exception(monkeypatch): - """Test that exceptions during conversion are handled gracefully.""" - def _raise(*args, **kwargs): - raise pl.exceptions.PolarsError("boom") - - monkeypatch.setattr(pl.DataFrame, "with_columns", _raise) - handler = PolarsOutputHandler( - data={"updated": [1765552906]}, - output_schema=DummySchemaUpdated, - user_universal_params=_make_params(), - ) - df = handler.get_result() - assert df is not None - assert df["updated"][0] == 1765552906 - - def test_pandas_output_handler_normalized_dataframe_fallback(): """Test that normalized dataframe fallback path is used when plain dataframe fails.""" handler = PandasOutputHandler(