Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/how_tos/map_time_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dst_table_name = "ev_charging_datetime"
hours_per_year = 12 * 7 * 24
num_time_arrays = 3
df = pd.DataFrame({
"id": np.concat([np.repeat(i, hours_per_year) for i in range(1, 1 + num_time_arrays)]),
"id": np.concatenate([np.repeat(i, hours_per_year) for i in range(1, 1 + num_time_arrays)]),
"month": np.tile(np.repeat(range(1, 13), 7 * 24), num_time_arrays),
"day_of_week": np.tile(np.tile(np.repeat(range(7), 24), 12), num_time_arrays),
"hour": np.tile(np.tile(range(24), 12 * 7), num_time_arrays),
Expand Down
6 changes: 2 additions & 4 deletions src/chronify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
AnnualTimeRange,
DatetimeRange,
DatetimeRangeWithTZColumn,
IndexTimeRangeNTZ,
IndexTimeRangeTZ,
IndexTimeRange,
IndexTimeRangeWithTZColumn,
RepresentativePeriodTimeNTZ,
RepresentativePeriodTimeTZ,
Expand All @@ -42,9 +41,8 @@
"CsvTableSchema",
"DatetimeRange",
"DatetimeRangeWithTZColumn",
"IndexTimeRange",
"IndexTimeRangeWithTZColumn",
"IndexTimeRangeNTZ",
"IndexTimeRangeTZ",
"InvalidOperation",
"InvalidParameter",
"InvalidTable",
Expand Down
147 changes: 85 additions & 62 deletions src/chronify/datetime_range_generator.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from datetime import datetime, tzinfo
from typing import Generator, Optional
from zoneinfo import ZoneInfo
from itertools import chain
from calendar import isleap

import pandas as pd

from chronify.time import (
LeapDayAdjustmentType,
TimeDataType,
)
from chronify.time_configs import DatetimeRanges, DatetimeRange, DatetimeRangeWithTZColumn
from chronify.time_utils import adjust_timestamp_by_dst_offset, get_tzname
from chronify.time_utils import get_tzname
from chronify.time_range_generator_base import TimeRangeGeneratorBase
from chronify.exceptions import InvalidValue

Expand All @@ -25,51 +26,52 @@ def __init__(
self._model = model
self._adjustment = leap_day_adjustment or LeapDayAdjustmentType.NONE

def _iter_timestamps(
self, start: Optional[datetime] = None
) -> Generator[datetime, None, None]:
"""
def _list_timestamps(self, start: Optional[datetime] = None) -> list[datetime]:
"""Return all timestamps as a list.
if start is supplied, override self._model.start
"""
if start is None:
start = self._model.start
tz = start.tzinfo

for i in range(self._model.length):
if not tz:
cur = adjust_timestamp_by_dst_offset(
start + i * self._model.resolution, self._model.resolution
)
else:
# always step in standard time
cur_utc = start.astimezone(ZoneInfo("UTC")) + i * self._model.resolution
cur = adjust_timestamp_by_dst_offset(
cur_utc.astimezone(tz), self._model.resolution
)

is_leap_year = (
pd.Timestamp(f"{cur.year}-01-01") + pd.Timedelta(days=365)
).year == cur.year
if not is_leap_year:
yield pd.Timestamp(cur)
continue

month = cur.month
day = cur.day
if not (
self._adjustment == LeapDayAdjustmentType.DROP_FEB29 and month == 2 and day == 29
):
if not (
self._adjustment == LeapDayAdjustmentType.DROP_DEC31
and month == 12
and day == 31
):
if not (
self._adjustment == LeapDayAdjustmentType.DROP_JAN1
and month == 1
and day == 1
):
yield pd.Timestamp(cur)

timestamps = pd.date_range(
start=start,
periods=self._model.length,
freq=self._model.resolution,
).tolist()

match self._adjustment:
case LeapDayAdjustmentType.DROP_FEB29:
timestamps = [
ts
for ts in timestamps
if not (isleap(ts.year) and ts.month == 2 and ts.day == 29)
]
case LeapDayAdjustmentType.DROP_DEC31:
timestamps = [
ts
for ts in timestamps
if not (isleap(ts.year) and ts.month == 12 and ts.day == 31)
]
case LeapDayAdjustmentType.DROP_JAN1:
timestamps = [
ts
for ts in timestamps
if not (isleap(ts.year) and ts.month == 1 and ts.day == 1)
]
case _:
pass

return timestamps # type: ignore

def _iter_timestamps(
self, start: Optional[datetime] = None
) -> Generator[datetime, None, None]:
"""Generator from pd.date_range().
Note: Established time library already handles historical changes in time zone conversion to UTC.
(e.g. Algeria (Africa/Algiers) changed from UTC+0 to UTC+1 on April 25, 1980)
"""
for ts in self._list_timestamps(start=start):
yield ts

def list_time_columns(self) -> list[str]:
return self._model.list_time_columns()
Expand All @@ -93,7 +95,7 @@ def __init__(
assert isinstance(self._model, DatetimeRange)

def list_timestamps(self) -> list[datetime]:
return list(self._iter_timestamps())
return self._list_timestamps() # list(self._iter_timestamps())


class DatetimeRangeGeneratorExternalTimeZone(DatetimeRangeGeneratorBase):
Expand All @@ -117,43 +119,64 @@ def __init__(
)
raise InvalidValue(msg)

def _list_timestamps(self, time_zone: Optional[tzinfo]) -> list[datetime]:
"""always return tz-naive timestamps relative to input time_zone"""
if self._model.start_time_is_tz_naive():
if time_zone:
def _list_timestamps_by_time_zone(self, time_zone: Optional[tzinfo]) -> list[datetime]:
"""return timestamps for a given time_zone expected in the dataframe
returned timestamp dtype matches that in the dataframe, i.e. self._model.dtype
(e.g., if time_zone is None, return tz-naive timestamps else return tz-aware timestamps)
"""
match (self._model.start_time_is_tz_naive(), self._model.dtype):
case (True, TimeDataType.TIMESTAMP_NTZ):
# aligned_in_local_standard_time of the time zone,
# all time zones must have the same tz-naive timestamps
# timestamps must represent local standard time zone, not local prevailing time zone with DST
start = self._model.start
case (True, TimeDataType.TIMESTAMP_TZ):
# aligned_in_local_standard_time of the time zone,
# all time zones have different tz-aware timestamps that are aligned when adjusted to local standard time zone
start = self._model.start.replace(tzinfo=time_zone)
else:
start = None
else:
if time_zone:
start = self._model.start.astimezone(time_zone)
else:
start = self._model.start.replace(tzinfo=None)
timestamps = list(self._iter_timestamps(start=start))
return [x.replace(tzinfo=None) for x in timestamps]
case (False, TimeDataType.TIMESTAMP_NTZ):
# aligned_in_absolute_time,
# all time zones have different tz-naive timestamps that are aligned when localized to the time zone
if time_zone:
start = self._model.start.astimezone(time_zone).replace(tzinfo=None)
else:
start = self._model.start.replace(tzinfo=None)
case (False, TimeDataType.TIMESTAMP_TZ):
# aligned_in_absolute_time, all time zones have the same tz-aware timestamps
start = self._model.start
case _:
msg = f"Unsupported combination of start_time_is_tz_naive and dtype: {self._model}"
raise InvalidValue(msg)
return self._list_timestamps(start=start) # ist(self._iter_timestamps(start=start))

def list_timestamps(self) -> list[datetime]:
"""return ordered timestamps across all time zones in the order of the time zones."""
"""return ordered tz-naive timestamps across all time zones in the order of the time zones."""
dct = self.list_timestamps_by_time_zone()
return list(chain(*dct.values()))

def list_timestamps_by_time_zone(self) -> dict[str, list[datetime]]:
"""for each time zone, returns full timestamp iteration (duplicates allowed)"""
"""for each time zone, returns full timestamp iteration
(duplicates allowed)"""
dct = {}
for tz in self._model.get_time_zones():
tz_name = get_tzname(tz)
dct[tz_name] = self._list_timestamps(tz)

dct[tz_name] = self._list_timestamps_by_time_zone(time_zone=tz)
return dct

def list_distinct_timestamps_by_time_zone_from_dataframe(
self, df: pd.DataFrame
) -> dict[str, list[datetime]]:
"""
from the dataframe, for each time zone, returns distinct timestamps
"""
tz_col = self._model.get_time_zone_column()
t_col = self._model.time_column
df[t_col] = pd.to_datetime(df[t_col])
df2 = df[[tz_col, t_col]].drop_duplicates()
dct = {}
for tz_name in sorted(df2[tz_col].unique()):
dct[tz_name] = sorted(df2.loc[df2[tz_col] == tz_name, t_col].tolist())
timestamps = sorted(df2.loc[df2[tz_col] == tz_name, t_col].tolist())
# if timestamps[0].tzinfo:
# timestamps = [x.astimezone(tz_name).replace(tzinfo=None) for x in timestamps]
Comment on lines +179 to +180
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment appears to contain commented-out code.

Suggested change
# if timestamps[0].tzinfo:
# timestamps = [x.astimezone(tz_name).replace(tzinfo=None) for x in timestamps]

Copilot uses AI. Check for mistakes.
dct[tz_name] = timestamps
return dct
20 changes: 11 additions & 9 deletions src/chronify/sqlalchemy/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
import pandas as pd
from numpy.dtypes import DateTime64DType, ObjectDType
from pandas import DatetimeTZDtype
from chronify.time import TimeDataType
from sqlalchemy import Connection, Engine, Selectable, text

from chronify.exceptions import InvalidOperation, InvalidParameter
from chronify.time_configs import DatetimeRangeBase, DatetimeRange, TimeBaseModel
from chronify.time_configs import DatetimeRangeBase, TimeBaseModel
from chronify.utils.path_utils import check_overwrite, delete_if_exists, to_path

# Copied from Pandas/Polars
Expand All @@ -35,7 +36,7 @@ def read_database(
df = conn.execute(query).cursor.fetch_df() # type: ignore
case "sqlite":
df = pd.read_sql(query, conn, params=params)
if isinstance(config, DatetimeRange):
if isinstance(config, DatetimeRangeBase):
_convert_database_output_for_datetime(df, config)
case "hive":
df = _read_from_hive(query, conn, config, params)
Expand Down Expand Up @@ -81,17 +82,16 @@ def _check_one_config_per_datetime_column(configs: Sequence[TimeBaseModel]) -> N


def _convert_database_input_for_datetime(
df: pd.DataFrame, config: DatetimeRange, copied: bool
df: pd.DataFrame, config: DatetimeRangeBase, copied: bool
) -> tuple[pd.DataFrame, bool]:
if config.start_time_is_tz_naive():
if config.dtype == TimeDataType.TIMESTAMP_NTZ:
return df, copied

if copied:
df2 = df
else:
df2 = df.copy()
copied = True

if isinstance(df2[config.time_column].dtype, DatetimeTZDtype):
df2[config.time_column] = df2[config.time_column].dt.tz_convert("UTC")
else:
Expand All @@ -100,9 +100,9 @@ def _convert_database_input_for_datetime(
return df2, copied


def _convert_database_output_for_datetime(df: pd.DataFrame, config: DatetimeRange) -> None:
def _convert_database_output_for_datetime(df: pd.DataFrame, config: DatetimeRangeBase) -> None:
if config.time_column in df.columns:
if not config.start_time_is_tz_naive():
if config.dtype == TimeDataType.TIMESTAMP_TZ:
if isinstance(df[config.time_column].dtype, ObjectDType):
df[config.time_column] = pd.to_datetime(df[config.time_column], utc=True)
else:
Expand All @@ -120,6 +120,7 @@ def _write_to_duckdb(
) -> None:
assert conn._dbapi_connection is not None
assert conn._dbapi_connection.driver_connection is not None

match if_table_exists:
case "append":
query = f"INSERT INTO {table_name} SELECT * FROM df"
Expand All @@ -131,6 +132,7 @@ def _write_to_duckdb(
case _:
msg = f"{if_table_exists=}"
raise InvalidOperation(msg)

conn._dbapi_connection.driver_connection.sql(query)


Expand Down Expand Up @@ -190,7 +192,7 @@ def _read_from_hive(
) -> pd.DataFrame:
df = pd.read_sql_query(query, conn, params=params)
if (
isinstance(config, DatetimeRange)
isinstance(config, DatetimeRangeBase)
and config.time_column in df.columns
and not config.start_time_is_tz_naive()
):
Expand All @@ -210,7 +212,7 @@ def _write_to_sqlite(
_check_one_config_per_datetime_column(configs)
copied = False
for config in configs:
if isinstance(config, DatetimeRange):
if isinstance(config, DatetimeRangeBase):
df, copied = _convert_database_input_for_datetime(df, config, copied)
df.to_sql(table_name, conn, if_exists=if_table_exists, index=False)

Expand Down
Loading
Loading