Skip to content

Commit 435d82a

Browse files
ntjohnson1claude
andcommitted
Add docstring examples for Scalar temporal functions
Add example usage to docstrings for Scalar temporal functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1160d5a commit 435d82a

File tree

1 file changed

+212
-8
lines changed

1 file changed

+212
-8
lines changed

python/datafusion/functions.py

Lines changed: 212 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,20 @@ def now() -> Expr:
11621162
"""Returns the current timestamp in nanoseconds.
11631163
11641164
This will use the same value for all instances of now() in same statement.
1165+
1166+
Examples:
1167+
---------
1168+
>>> ctx = dfn.SessionContext()
1169+
>>> df = ctx.from_pydict({"a": [1]})
1170+
>>> result = df.select(
1171+
... dfn.functions.now().alias("now")
1172+
... )
1173+
1174+
Use .value instead of .as_py() because nanosecond timestamps
1175+
require pandas to convert to Python datetime objects.
1176+
1177+
>>> result.collect_column("now")[0].value > 0
1178+
True
11651179
"""
11661180
return Expr(f.now())
11671181

@@ -1222,6 +1236,18 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
12221236
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
12231237
12241238
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1239+
1240+
Examples:
1241+
---------
1242+
>>> ctx = dfn.SessionContext()
1243+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1244+
>>> result = df.select(
1245+
... dfn.functions.to_timestamp(
1246+
... dfn.col("a")
1247+
... ).alias("ts")
1248+
... )
1249+
>>> str(result.collect_column("ts")[0].as_py())
1250+
'2021-01-01 00:00:00'
12251251
"""
12261252
return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))
12271253

@@ -1230,6 +1256,18 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
12301256
"""Converts a string and optional formats to a ``Timestamp`` in milliseconds.
12311257
12321258
See :py:func:`to_timestamp` for a description on how to use formatters.
1259+
1260+
Examples:
1261+
---------
1262+
>>> ctx = dfn.SessionContext()
1263+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1264+
>>> result = df.select(
1265+
... dfn.functions.to_timestamp_millis(
1266+
... dfn.col("a")
1267+
... ).alias("ts")
1268+
... )
1269+
>>> str(result.collect_column("ts")[0].as_py())
1270+
'2021-01-01 00:00:00'
12331271
"""
12341272
return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))
12351273

@@ -1238,6 +1276,18 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
12381276
"""Converts a string and optional formats to a ``Timestamp`` in microseconds.
12391277
12401278
See :py:func:`to_timestamp` for a description on how to use formatters.
1279+
1280+
Examples:
1281+
---------
1282+
>>> ctx = dfn.SessionContext()
1283+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1284+
>>> result = df.select(
1285+
... dfn.functions.to_timestamp_micros(
1286+
... dfn.col("a")
1287+
... ).alias("ts")
1288+
... )
1289+
>>> str(result.collect_column("ts")[0].as_py())
1290+
'2021-01-01 00:00:00'
12411291
"""
12421292
return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))
12431293

@@ -1246,6 +1296,18 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
12461296
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
12471297
12481298
See :py:func:`to_timestamp` for a description on how to use formatters.
1299+
1300+
Examples:
1301+
---------
1302+
>>> ctx = dfn.SessionContext()
1303+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1304+
>>> result = df.select(
1305+
... dfn.functions.to_timestamp_nanos(
1306+
... dfn.col("a")
1307+
... ).alias("ts")
1308+
... )
1309+
>>> str(result.collect_column("ts")[0].as_py())
1310+
'2021-01-01 00:00:00'
12491311
"""
12501312
return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))
12511313

@@ -1254,69 +1316,198 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
12541316
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
12551317
12561318
See :py:func:`to_timestamp` for a description on how to use formatters.
1319+
1320+
Examples:
1321+
---------
1322+
>>> ctx = dfn.SessionContext()
1323+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1324+
>>> result = df.select(
1325+
... dfn.functions.to_timestamp_seconds(
1326+
... dfn.col("a")
1327+
... ).alias("ts")
1328+
... )
1329+
>>> str(result.collect_column("ts")[0].as_py())
1330+
'2021-01-01 00:00:00'
12571331
"""
12581332
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
12591333

12601334

12611335
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
1262-
"""Converts a string and optional formats to a Unixtime."""
1336+
"""Converts a string and optional formats to a Unixtime.
1337+
1338+
Examples:
1339+
---------
1340+
>>> ctx = dfn.SessionContext()
1341+
>>> df = ctx.from_pydict({"a": ["1970-01-01T00:00:00"]})
1342+
>>> result = df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
1343+
>>> result.collect_column("u")[0].as_py()
1344+
0
1345+
"""
12631346
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
12641347

12651348

12661349
def current_date() -> Expr:
1267-
"""Returns current UTC date as a Date32 value."""
1350+
"""Returns current UTC date as a Date32 value.
1351+
1352+
Examples:
1353+
---------
1354+
>>> ctx = dfn.SessionContext()
1355+
>>> df = ctx.from_pydict({"a": [1]})
1356+
>>> result = df.select(
1357+
... dfn.functions.current_date().alias("d")
1358+
... )
1359+
>>> result.collect_column("d")[0].as_py() is not None
1360+
True
1361+
"""
12681362
return Expr(f.current_date())
12691363

12701364

12711365
today = current_date
12721366

12731367

12741368
def current_time() -> Expr:
1275-
"""Returns current UTC time as a Time64 value."""
1369+
"""Returns current UTC time as a Time64 value.
1370+
1371+
Examples:
1372+
---------
1373+
>>> ctx = dfn.SessionContext()
1374+
>>> df = ctx.from_pydict({"a": [1]})
1375+
>>> result = df.select(
1376+
... dfn.functions.current_time().alias("t")
1377+
... )
1378+
1379+
Use .value instead of .as_py() because nanosecond timestamps
1380+
require pandas to convert to Python datetime objects.
1381+
1382+
>>> result.collect_column("t")[0].value > 0
1383+
True
1384+
"""
12761385
return Expr(f.current_time())
12771386

12781387

12791388
def datepart(part: Expr, date: Expr) -> Expr:
12801389
"""Return a specified part of a date.
12811390
12821391
This is an alias for :py:func:`date_part`.
1392+
1393+
Examples:
1394+
---------
1395+
>>> ctx = dfn.SessionContext()
1396+
>>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
1397+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1398+
>>> result = df.select(
1399+
... dfn.functions.datepart(dfn.lit("month"), dfn.col("a")).alias("m"))
1400+
>>> result.collect_column("m")[0].as_py()
1401+
7
12831402
"""
12841403
return date_part(part, date)
12851404

12861405

12871406
def date_part(part: Expr, date: Expr) -> Expr:
1288-
"""Extracts a subfield from the date."""
1407+
"""Extracts a subfield from the date.
1408+
1409+
Examples:
1410+
---------
1411+
>>> ctx = dfn.SessionContext()
1412+
>>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
1413+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1414+
>>> result = df.select(
1415+
... dfn.functions.date_part(dfn.lit("year"), dfn.col("a")).alias("y"))
1416+
>>> result.collect_column("y")[0].as_py()
1417+
2021
1418+
"""
12891419
return Expr(f.date_part(part.expr, date.expr))
12901420

12911421

12921422
def extract(part: Expr, date: Expr) -> Expr:
12931423
"""Extracts a subfield from the date.
12941424
12951425
This is an alias for :py:func:`date_part`.
1426+
1427+
Examples:
1428+
---------
1429+
>>> ctx = dfn.SessionContext()
1430+
>>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
1431+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1432+
>>> result = df.select(
1433+
... dfn.functions.extract(dfn.lit("day"), dfn.col("a")).alias("d"))
1434+
>>> result.collect_column("d")[0].as_py()
1435+
15
12961436
"""
12971437
return date_part(part, date)
12981438

12991439

13001440
def date_trunc(part: Expr, date: Expr) -> Expr:
1301-
"""Truncates the date to a specified level of precision."""
1441+
"""Truncates the date to a specified level of precision.
1442+
1443+
Examples:
1444+
---------
1445+
>>> ctx = dfn.SessionContext()
1446+
>>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
1447+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1448+
>>> result = df.select(
1449+
... dfn.functions.date_trunc(
1450+
... dfn.lit("month"), dfn.col("a")
1451+
... ).alias("t")
1452+
... )
1453+
>>> str(result.collect_column("t")[0].as_py())
1454+
'2021-07-01 00:00:00'
1455+
"""
13021456
return Expr(f.date_trunc(part.expr, date.expr))
13031457

13041458

13051459
def datetrunc(part: Expr, date: Expr) -> Expr:
13061460
"""Truncates the date to a specified level of precision.
13071461
13081462
This is an alias for :py:func:`date_trunc`.
1463+
1464+
Examples:
1465+
---------
1466+
>>> ctx = dfn.SessionContext()
1467+
>>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
1468+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1469+
>>> result = df.select(
1470+
... dfn.functions.datetrunc(
1471+
... dfn.lit("year"), dfn.col("a")
1472+
... ).alias("t")
1473+
... )
1474+
>>> str(result.collect_column("t")[0].as_py())
1475+
'2021-01-01 00:00:00'
13091476
"""
13101477
return date_trunc(part, date)
13111478

13121479

13131480
def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
1314-
"""Coerces an arbitrary timestamp to the start of the nearest specified interval."""
1481+
"""Coerces an arbitrary timestamp to the start of the nearest specified interval.
1482+
1483+
Examples:
1484+
---------
1485+
>>> ctx = dfn.SessionContext()
1486+
>>> result = ctx.sql(
1487+
... "SELECT date_bin(interval '1 day',"
1488+
... " timestamp '2021-07-15 12:34:56',"
1489+
... " timestamp '2021-01-01') as b"
1490+
... )
1491+
>>> str(result.collect_column("b")[0].as_py())
1492+
'2021-07-15 00:00:00'
1493+
"""
13151494
return Expr(f.date_bin(stride.expr, source.expr, origin.expr))
13161495

13171496

13181497
def make_date(year: Expr, month: Expr, day: Expr) -> Expr:
1319-
"""Make a date from year, month and day component parts."""
1498+
"""Make a date from year, month and day component parts.
1499+
1500+
Examples:
1501+
---------
1502+
>>> from datetime import date
1503+
>>> ctx = dfn.SessionContext()
1504+
>>> df = ctx.from_pydict({"y": [2024], "m": [1], "d": [15]})
1505+
>>> result = df.select(
1506+
... dfn.functions.make_date(dfn.col("y"), dfn.col("m"),
1507+
... dfn.col("d")).alias("dt"))
1508+
>>> result.collect_column("dt")[0].as_py()
1509+
datetime.date(2024, 1, 15)
1510+
"""
13201511
return Expr(f.make_date(year.expr, month.expr, day.expr))
13211512

13221513

@@ -1393,7 +1584,20 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr:
13931584

13941585

13951586
def from_unixtime(arg: Expr) -> Expr:
1396-
"""Converts an integer to RFC3339 timestamp format string."""
1587+
"""Converts an integer to RFC3339 timestamp format string.
1588+
1589+
Examples:
1590+
---------
1591+
>>> ctx = dfn.SessionContext()
1592+
>>> df = ctx.from_pydict({"a": [0]})
1593+
>>> result = df.select(
1594+
... dfn.functions.from_unixtime(
1595+
... dfn.col("a")
1596+
... ).alias("ts")
1597+
... )
1598+
>>> str(result.collect_column("ts")[0].as_py())
1599+
'1970-01-01 00:00:00'
1600+
"""
13971601
return Expr(f.from_unixtime(arg.expr))
13981602

13991603

0 commit comments

Comments
 (0)