From ddb36d950dc77c2e6ecd546f37dd92fdc0ef0ca3 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Fri, 8 May 2026 14:37:25 +0200 Subject: [PATCH 1/4] Read fractions of seconds from SAS datasets --- pyreadstat/_readstat_parser.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyreadstat/_readstat_parser.pyx b/pyreadstat/_readstat_parser.pyx index 12dd1e3..42a0390 100644 --- a/pyreadstat/_readstat_parser.pyx +++ b/pyreadstat/_readstat_parser.pyx @@ -253,7 +253,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ # tstamp in seconds days = (floor(tstamp / 86400)) secs = (tstamp % 86400) - tdelta = timedelta_new(days, secs, 0) + usecs = (round(tstamp % 1 * 1e6)) + tdelta = timedelta_new(days, secs, usecs) #tdelta = timedelta(seconds=tstamp) mydat = origin + tdelta return mydat @@ -270,7 +271,8 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ # tstamp in seconds days = (floor(tstamp / 86400)) secs = (tstamp % 86400) - tdelta = timedelta_new(days, secs, 0) + usecs = (round(tstamp % 1 * 1e6)) + tdelta = timedelta_new(days, secs, usecs) #tdelta = timedelta(seconds=tstamp) mydat = origin + tdelta return mydat.time() From b2fed946446e0434d73ecd1495d4b4786cbe7629 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Wed, 13 May 2026 16:33:29 +0200 Subject: [PATCH 2/4] Add a test for a SAS file with fractional seconds time --- test_data/basic/fractional_seconds.csv | 101 ++++++++++++++++++++ test_data/basic/fractional_seconds.sas7bdat | Bin 0 -> 131072 bytes tests/test_narwhalified.py | 22 ++++- 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100755 test_data/basic/fractional_seconds.csv create mode 100755 test_data/basic/fractional_seconds.sas7bdat diff --git a/test_data/basic/fractional_seconds.csv b/test_data/basic/fractional_seconds.csv new file mode 100755 index 0000000..512d867 --- /dev/null +++ b/test_data/basic/fractional_seconds.csv @@ -0,0 +1,101 @@ +date,dtime,time +1993-06-10,1993-06-10T02:04:01.122463,02:04:01.122463 +2147-07-18,2147-07-18T02:16:02.883684,02:16:02.883684 +1739-04-22,1739-04-22T13:32:08.170115,13:32:08.170115 +2187-12-07,2187-12-07T15:41:41.567238,15:41:41.567238 +2278-04-20,2278-04-20T12:41:18.331215,12:41:18.331215 +2181-09-03,2181-09-03T16:20:48.795826,16:20:48.795826 +1992-01-21,1992-01-21T04:27:25.154369,04:27:25.154369 +1829-09-11,1829-09-11T10:47:37.282617,10:47:37.282618 +2294-10-29,2294-10-29T14:41:18.574982,14:41:18.574982 +1920-04-17,1920-04-17T16:31:07.566722,16:31:07.566723 +1868-07-28,1868-07-28T06:16:21.620745,06:16:21.620745 +2208-03-23,2208-03-23T05:13:20.026692,05:13:20.026692 +1929-05-15,1929-05-15T18:58:37.253180,18:58:37.253180 +1998-08-26,1998-08-26T02:03:58.743517,02:03:58.743517 +2122-11-24,2122-11-24T23:01:29.367663,23:01:29.367663 +1854-08-25,1854-08-25T01:56:49.077793,01:56:49.077793 +1936-01-09,1936-01-09T15:41:42.922773,15:41:42.922773 +1967-11-09,1967-11-09T15:23:02.071943,15:23:02.071943 +1703-11-02,1703-11-02T14:27:03.782796,14:27:03.782796 +1750-11-28,1750-11-28T08:52:49.219013,08:52:49.219013 +1734-07-09,1734-07-09T23:47:14.951282,23:47:14.951282 +2157-07-31,2157-07-31T21:52:15.157284,21:52:15.157284 +1950-01-07,1950-01-07T14:53:48.730921,14:53:48.730921 +1930-05-11,1930-05-11T03:24:48.220010,03:24:48.220011 +1772-06-29,1772-06-29T11:31:57.032263,11:31:57.032263 +2207-11-02,2207-11-02T16:23:11.818488,16:23:11.818488 +2275-05-17,2275-05-17T19:13:17.300388,19:13:17.300388 +1720-01-04,1720-01-04T17:34:54.322509,17:34:54.322509 +2273-01-24,2273-01-24T14:14:38.193558,14:14:38.193558 +1834-11-01,1834-11-01T06:04:17.738995,06:04:17.738995 +2266-10-02,2266-10-02T10:05:04.330017,10:05:04.330017 +1983-11-26,1983-11-26T12:30:57.127726,12:30:57.127726 +1967-11-22,1967-11-22T19:10:31.922508,19:10:31.922508 +1772-12-29,1772-12-29T17:17:40.509609,17:17:40.509609 +1920-08-05,1920-08-05T09:14:58.541695,09:14:58.541695 +2237-04-05,2237-04-05T11:10:54.366266,11:10:54.366266 +2285-04-22,2285-04-22T07:20:52.103394,07:20:52.103394 +1790-03-02,1790-03-02T11:20:35.978824,11:20:35.978824 +2222-09-28,2222-09-28T23:32:00.371672,23:32:00.371672 +2020-06-02,2020-06-02T11:24:03.381682,11:24:03.381682 +1942-09-06,1942-09-06T22:41:22.091431,22:41:22.091431 +1734-04-01,1734-04-01T02:06:18.583556,02:06:18.583556 +1710-04-28,1710-04-28T10:05:23.561800,10:05:23.561800 +1920-02-28,1920-02-28T14:46:28.076923,14:46:28.076923 +2059-01-28,2059-01-28T10:47:03.890320,10:47:03.890320 +1739-04-24,1739-04-24T22:59:28.010889,22:59:28.010889 +1911-07-10,1911-07-10T08:28:09.542829,08:28:09.542830 +2044-05-13,2044-05-13T15:42:33.791598,15:42:33.791598 +2281-02-02,2281-02-02T05:26:26.257008,05:26:26.257008 +1968-03-27,1968-03-27T22:34:47.154648,22:34:47.154648 +2237-11-09,2237-11-09T06:38:59.895151,06:38:59.895151 +1891-11-11,1891-11-11T03:51:21.527200,03:51:21.527201 +1769-05-08,1769-05-08T22:58:51.374555,22:58:51.374555 +2011-02-23,2011-02-23T20:37:24.734829,20:37:24.734829 +2211-07-10,2211-07-10T05:06:31.123780,05:06:31.123780 +2033-03-22,2033-03-22T06:28:12.726483,06:28:12.726483 +1824-10-31,1824-10-31T20:41:51.630083,20:41:51.630083 +1742-04-02,1742-04-02T22:15:15.392728,22:15:15.392728 +2286-01-29,2286-01-29T13:07:02.468262,13:07:02.468262 +1884-03-12,1884-03-12T07:45:14.166535,07:45:14.166534 +2096-04-26,2096-04-26T22:00:08.409092,22:00:08.409092 +1923-12-02,1923-12-02T13:15:39.641922,13:15:39.641922 +1723-06-12,1723-06-12T16:43:33.592489,16:43:33.592489 +2134-11-18,2134-11-18T06:18:01.839232,06:18:01.839232 +2269-02-20,2269-02-20T00:25:17.618843,00:25:17.618843 +1854-04-22,1854-04-22T05:25:48.059167,05:25:48.059166 +1968-07-13,1968-07-13T22:57:57.493756,22:57:57.493756 +1977-01-08,1977-01-08T09:30:29.495407,09:30:29.495407 +2010-12-19,2010-12-19T11:18:26.461924,11:18:26.461924 +2163-09-13,2163-09-13T18:33:24.278122,18:33:24.278122 +1730-01-15,1730-01-15T21:39:25.275543,21:39:25.275543 +1978-09-24,1978-09-24T23:11:39.162304,23:11:39.162304 +2224-02-08,2224-02-08T15:45:01.422703,15:45:01.422703 +1787-02-04,1787-02-04T11:01:01.320380,11:01:01.320380 +1964-01-18,1964-01-18T06:23:14.746125,06:23:14.746125 +1788-12-09,1788-12-09T09:30:16.346816,09:30:16.346816 +2290-09-07,2290-09-07T01:57:16.982105,01:57:16.982105 +2035-02-14,2035-02-14T23:34:56.107008,23:34:56.107008 +1905-02-18,1905-02-18T19:29:49.899169,19:29:49.899170 +2219-10-01,2219-10-01T20:28:45.250220,20:28:45.250220 +1781-08-02,1781-08-02T22:07:25.500849,22:07:25.500849 +1820-07-20,1820-07-20T13:07:18.717742,13:07:18.717742 +2171-03-31,2171-03-31T13:50:34.930294,13:50:34.930294 +2148-06-30,2148-06-30T07:32:48.692223,07:32:48.692223 +2093-03-04,2093-03-04T14:22:35.691149,14:22:35.691149 +2219-01-17,2219-01-17T20:32:47.025956,20:32:47.025956 +2032-06-27,2032-06-27T02:51:01.604809,02:51:01.604809 +1894-04-03,1894-04-03T00:15:54.122685,00:15:54.122684 +1960-02-06,1960-02-06T14:44:53.032016,14:44:53.032016 +1955-10-09,1955-10-09T18:14:44.323325,18:14:44.323324 +2195-06-21,2195-06-21T22:32:46.631441,22:32:46.631441 +1919-12-11,1919-12-11T19:11:02.762172,19:11:02.762173 +1741-05-21,1741-05-21T18:16:05.399772,18:16:05.399772 +1971-10-11,1971-10-11T00:14:23.048366,00:14:23.048366 +1810-04-22,1810-04-22T13:43:24.608671,13:43:24.608671 +2198-05-30,2198-05-30T12:31:55.632376,12:31:55.632376 +1774-04-27,1774-04-27T07:31:02.189986,07:31:02.189986 +2205-05-18,2205-05-18T10:39:57.032547,10:39:57.032547 +1875-08-15,1875-08-15T16:15:21.807336,16:15:21.807335 +2074-07-21,2074-07-21T08:35:02.723811,08:35:02.723811 diff --git a/test_data/basic/fractional_seconds.sas7bdat b/test_data/basic/fractional_seconds.sas7bdat new file mode 100755 index 0000000000000000000000000000000000000000..981a8a00eaa24006329db5aa61eb33ed1b315853 GIT binary patch literal 131072 zcmeI!c~n)^9tZGq0TB@t<7UnbnW#B{C_!oMbCa9_0dYtZ#7GeYmCO-AjT{q2LtQyl zT27#VC@8THDWV{gf(Xjw0HGrDbT9DEaIcuF*X#Z9-s+F-6morA%GR;UX ztc&RqR(*00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_@DCSAPl?{7nWFLqA8Gk9UHQ(mjbVyq(iu4)8>ovO zsTjsmoe$t+e<9BwPvqla>ez^nZPoE3Wk>!TACDIDd|UTQJn^+yIIq5i5Hd``by9d{ zm)?XG8EljFmDwge5{QttjH@Dh-X4)W$e@8`X7#r#Elm{jRcCAImy%n1)w|S@MaHwk;!<1K1rC{mqP}}7=8YDpi0g^vM^0YQQVg(mI&gE^6EW{}wU)$O zHprdVy@mDex#+9jSDuM^rZAbzOA2#KdncV$bQYovF>gAqjNYotjn(?Gl3P?Zy6ceU zQfZumRuXz3z<*|hwj%BX@ygEnUCal$q|rO^eRpP_&gLH2?>ArG<+(JDLoqEdpE{&2 z@G-Y-?4nfz-sEqwlNkMuN-uF^HUbGAwv ze@`k&*%$ftyx*xc(n}j3c$!@+ z=k7O*t1$MsERElnqhY062cFuI%*j9e^qU82H^%z}xtKHXjNi9sS=~jSPD|34JdfUO zpr==Fx?{~~7Ue7E_3U1*7 zR2Ng}ySe7vr1u-ykKgAb+gfQp>(0=5rQq|^jWYiblrN_2vDUyzRrr&uadHQ?RQ`y}FQIro0#`6+(0a>RQ7IZ9IeLtx5s9zc_be&i5%QxBFSsFTHx}sPd-!iU=^slU$_SRHJ!8>0{{rRDE zU#(U`Zy$}9n`kR63T20NbMA?GI;oN-xY3!$jkVnF3n9)Ms-$}A+ekjE>Evktu#LUt z#z&FTdNDh5$Qs|gz1tVxXZ_wyu<_BCoSE25v~@~d!>mb^8$Ks!rCGJKj_zi4lsoBW z)3&vRThRTv=e~o|{tW%Nffj0AU)^O+D>tw03#&*Z-YUbovf?&iM(5!DBVF=M28(#ctLvf|L}%G6xW zSwB8uLGlB!KIpANa?RClT<_e6thFm29g_AlJ*Faay1fx|tVl}{6Opw%)bzGk&jkHU zbVoRLyWASVdX4a3y*y8PuFT{nVn4)Wi_r{5v0;hD_uNwlR|nlO5d_;3$p5 z^s1*rzdkYb%?u@%vi@{^l$Z3~BbG172H!(*#=GiRfAf|LUk;MyUlpn(3u}EmZ@#Og z@QZ6XzdANtyl>FGQtHq*Fec_+IhSPVena~$^}5XZg`|1~){i$l$7-MVc`|dPdVfy3 zL^do=Jm{fC*|O{N^CM%W_ks@3CPPCHxvy%x%MQLPPjM(#zZXMmh~|;YA zQhx`6>!^G9yC!?|lw5xDkwtg(q~~Sg;)zG{-kdiNb1adk?T=a@-M4s0BU$)cuEhj{ zcDDbrl#sKL(!OGHD`{NO(5w)bI_}1|JISee>ihNzCsul@$=g>PX8qpSL?Z(v2O3;S zm#tV+-`2N|d#|v6@2(fr^Iz9W<`k4IQTb^r#$EbgiE^Fv-4064C*S*M%hOMmuoF5t z#LD8;4ts7>oq&*l*tC0Gvz+lPG_Mvt{f9J?GUkqj^;Z79j(o!0X_xXrSubda$|AzP zxsHn&w?;c}nR=fTmQ%CMd;MY-R&ge8{MwjnB&{#Wts(15Ben(SG_hlzHC~-Jqg))n zD!q!#GVZFMH@JcQAQ{O!uxDpc~M} zM&&wrS9O#2EmLrh?4J{@FzZ^vo;u{=eM|qUI1X8HmsY;Bs@kujkgHBeWR;ma#XQ3( z$eG@~H%FOIWCtwyMEBS;X?$kjembQhCSpa#5zhU_jA8a&r1d53b!0)GigVvZDB0v< zk3JQd>iry+M#F3!M>OVSbLmd^t$gmQ_d{SU-Fook0n_MaZh*^IpSyLF^31Jsa>bR^wSl(=QBI?E5)? zt#s`AvKP`~&UKoUYWJhPA! zdO=r+g`qS(zg1rD6+SJtmOsfg|4B|$|G(@T_@>zrjeOg##W05I8}_g?*Ae#6E9=Ma zB&i8`sZMxOhVkS#C;tdy6BGZiKj|saoBr;UgeT$|#5EM&IpNqq-TSF~@%`ZAzt{9$ rc_-x3dnR6JAjbV($^6xLI{Zvz>bI<;Zt2UVtm)q$$Z< literal 0 HcmV?d00001 diff --git a/tests/test_narwhalified.py b/tests/test_narwhalified.py index 9721a1d..01cfb0c 100644 --- a/tests/test_narwhalified.py +++ b/tests/test_narwhalified.py @@ -171,6 +171,22 @@ def _prepare_data(self): self.df_sas_dates = df_dates2.to_native() #schema = {"date": nw.Date, "dtime": nw.Datetime("ns"), "time": nw.Time()} self.df_sas_dates2 = nw.concat([df_dates2, nw.from_dict({"date":[None], "dtime":[None], "time":[None]}, backend=backend)]).to_native() #, schema=schema + + # datetime and time variables with fractional seconds as well as unusual date, time and datetime formats + sas_fractional_seconds = os.path.join(self.basic_data_folder, "fractional_seconds.csv") + if backend == "polars": + kwds["try_parse_dates"] = True + df_fractional_seconds_raw = nw.read_csv(sas_fractional_seconds,backend=backend, **kwds) + df_fractional_seconds1 = df_fractional_seconds_raw.clone() + df_fractional_seconds1 = df_fractional_seconds1.to_native() + if backend == "pandas": + df_fractional_seconds1["date"] = pd.to_datetime(df_fractional_seconds1["date"]) + df_fractional_seconds1["date"] = df_fractional_seconds1["date"].apply(lambda x: x.date()) + df_fractional_seconds1["dtime"] = pd.to_datetime(df_fractional_seconds1["dtime"]) + df_fractional_seconds1["time"] = pd.to_datetime(df_fractional_seconds1["time"], format='%H:%M:%S.%f') + df_fractional_seconds1["time"] = df_fractional_seconds1["time"].apply(lambda x: x.time()) + self.df_sas_fractional_seconds = df_fractional_seconds1 + # character column with nan and object column with nan (object pyreadstat writer doesn't know what to do with) if backend == "pandas": self.df_charnan = pd.DataFrame([[0,np.nan,np.nan],[1,"test", timedelta]], columns = ["integer", "string", "object"]) @@ -574,7 +590,11 @@ def test_sas_dates_as_pandas(self): sas_file = os.path.join(self.basic_data_folder, "dates.sas7bdat") df_sas, meta = pyreadstat.read_sas7bdat(sas_file, dates_as_pandas_datetime=True, output_format=self.backend) self.assertTrue(df_sas.equals(self.df_sas_dates_as_pandas)) - + + def test_sas_fractional_seconds(self): + sas_file = os.path.join(self.basic_data_folder, "fractional_seconds.sas7bdat") + df_sas, meta = pyreadstat.read_sas7bdat(sas_file, output_format=self.backend) + self.assertTrue(df_sas.equals(self.df_sas_fractional_seconds)) def test_sas_user_missing(self): From 0818df5750c9b21983846f9361893142d5b51570 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Thu, 14 May 2026 01:24:12 +0200 Subject: [PATCH 3/4] Prevent rounding errors when processing datetime to polars Datetime values in SAS, SPSS and STATA are stored as a floating point number. Any operation risks introducing a rounding error. Use integer math in order to preserve original interpretation. --- pyreadstat/_readstat_parser.pyx | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pyreadstat/_readstat_parser.pyx b/pyreadstat/_readstat_parser.pyx index 42a0390..24c030b 100644 --- a/pyreadstat/_readstat_parser.pyx +++ b/pyreadstat/_readstat_parser.pyx @@ -236,10 +236,10 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ # we want to return seconds from unix if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons - return (tstamp/1000) - unix_to_origin_secs + return (tstamp/1000), unix_to_origin_secs else: # tstamp in seconds - return tstamp - unix_to_origin_secs + return tstamp, unix_to_origin_secs if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons @@ -1107,7 +1107,16 @@ cdef object dict_to_dataframe(object dict_data, data_container dc): if var_format == DATE_FORMAT_DATE: date_cols.append(column) if datetime_cols: - data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*datetime_cols), time_unit='s')) + data_frame = data_frame.with_columns( + [ + pl.from_epoch( + (pl.col(c).list.get(0) % 1 * 1e6).round().cast(pl.Int64) + ( + pl.col(c).list.get(0).floor() * 1e6).cast(pl.Int64) - ( + pl.col(c).list.get(1) * 1e6).cast(pl.Int64), + time_unit='us') + for c in datetime_cols if data_frame[c].len() > 0 + ] + ) if date_cols: data_frame = data_frame.with_columns(pl.from_epoch(pl.col(*date_cols), time_unit='d')) From 03e52af2ef26b3c7a81d30fa1adb0ecb761d9d2d Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Mon, 18 May 2026 17:30:38 +0200 Subject: [PATCH 4/4] Fetch `unix_to_origin_secs` from data container This prevents `transform_datetime()` from having to return a tuple. --- pyreadstat/_readstat_parser.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyreadstat/_readstat_parser.pyx b/pyreadstat/_readstat_parser.pyx index 24c030b..802fd4c 100644 --- a/pyreadstat/_readstat_parser.pyx +++ b/pyreadstat/_readstat_parser.pyx @@ -233,13 +233,13 @@ cdef object transform_datetime(py_datetime_format var_format, double tstamp, py_ return mydat.date() elif var_format == DATE_FORMAT_DATETIME: if output_format == "polars": - # we want to return seconds from unix + # we want to return timestamp in seconds if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons - return (tstamp/1000), unix_to_origin_secs + return (tstamp/1000) else: # tstamp in seconds - return tstamp, unix_to_origin_secs + return tstamp if file_format == FILE_FORMAT_STATA: # tstamp is in millisecons @@ -1110,9 +1110,9 @@ cdef object dict_to_dataframe(object dict_data, data_container dc): data_frame = data_frame.with_columns( [ pl.from_epoch( - (pl.col(c).list.get(0) % 1 * 1e6).round().cast(pl.Int64) + ( - pl.col(c).list.get(0).floor() * 1e6).cast(pl.Int64) - ( - pl.col(c).list.get(1) * 1e6).cast(pl.Int64), + (pl.col(c) % 1 * 1e6).round().cast(pl.Int64) + ( + pl.col(c).floor() * 1e6).cast(pl.Int64) - ( + pl.lit(dc.unix_to_origin_secs) * 1e6).cast(pl.Int64), time_unit='us') for c in datetime_cols if data_frame[c].len() > 0 ]