@@ -1185,7 +1185,7 @@ def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
11851185
11861186 >>> result = df.select(
11871187 ... dfn.functions.lpad(
1188- ... dfn.col("a"), dfn.lit(10), dfn.lit(".")
1188+ ... dfn.col("a"), dfn.lit(10), characters= dfn.lit(".")
11891189 ... ).alias("lpad"))
11901190 >>> result.collect_column("lpad")[0].as_py()
11911191 '...the cat'
@@ -3372,7 +3372,9 @@ def approx_median(expression: Expr, filter: Expr | None = None) -> Expr:
33723372 >>> ctx = dfn.SessionContext()
33733373 >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
33743374 >>> result = df.aggregate(
3375- ... [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
3375+ ... [], [dfn.functions.approx_median(
3376+ ... dfn.col("a")
3377+ ... ).alias("v")])
33763378 >>> result.collect_column("v")[0].as_py()
33773379 2.0
33783380
@@ -3812,10 +3814,10 @@ def median(
38123814 >>> result = df.aggregate(
38133815 ... [], [dfn.functions.median(
38143816 ... dfn.col("a"), distinct=True,
3815- ... filter=dfn.col("a") > dfn.lit(0 .0),
3817+ ... filter=dfn.col("a") < dfn.lit(3 .0),
38163818 ... ).alias("v")])
38173819 >>> result.collect_column("v")[0].as_py()
3818- 2.0
3820+ 1.5
38193821 """
38203822 filter_raw = filter .expr if filter is not None else None
38213823 return Expr (f .median (expression .expr , distinct = distinct , filter = filter_raw ))
@@ -3976,12 +3978,11 @@ def var_pop(expression: Expr, filter: Expr | None = None) -> Expr:
39763978
39773979 Examples:
39783980 >>> ctx = dfn.SessionContext()
3979- >>> df = ctx.from_pydict({"a": [0.0, 2.0]})
3981+ >>> df = ctx.from_pydict({"a": [-1.0, 0.0, 2.0]})
39803982 >>> result = df.aggregate([], [dfn.functions.var_pop(dfn.col("a")).alias("v")])
39813983 >>> result.collect_column("v")[0].as_py()
3982- 1.0
3984+ 1.555...
39833985
3984- >>> df = ctx.from_pydict({"a": [-1.0, 0.0, 2.0]})
39853986 >>> result = df.aggregate(
39863987 ... [], [dfn.functions.var_pop(
39873988 ... dfn.col("a"),
@@ -4169,12 +4170,12 @@ def regr_intercept(
41694170
41704171 Examples:
41714172 >>> ctx = dfn.SessionContext()
4172- >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1 .0, 2 .0, 3 .0]})
4173+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [4 .0, 16 .0, 36 .0]})
41734174 >>> result = df.aggregate(
41744175 ... [],
41754176 ... [dfn.functions.regr_intercept(dfn.col("y"), dfn.col("x")).alias("v")])
41764177 >>> result.collect_column("v")[0].as_py()
4177- 0.0
4178+ 1.714...
41784179
41794180 >>> result = df.aggregate(
41804181 ... [],
@@ -4183,7 +4184,7 @@ def regr_intercept(
41834184 ... filter=dfn.col("y") > dfn.lit(2.0)
41844185 ... ).alias("v")])
41854186 >>> result.collect_column("v")[0].as_py()
4186- 0.0
4187+ 2.4
41874188 """
41884189 filter_raw = filter .expr if filter is not None else None
41894190
@@ -4210,11 +4211,11 @@ def regr_r2(
42104211
42114212 Examples:
42124213 >>> ctx = dfn.SessionContext()
4213- >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1 .0, 2 .0, 3 .0]})
4214+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [4 .0, 16 .0, 36 .0]})
42144215 >>> result = df.aggregate(
42154216 ... [], [dfn.functions.regr_r2(dfn.col("y"), dfn.col("x")).alias("v")])
42164217 >>> result.collect_column("v")[0].as_py()
4217- 1.0
4218+ 0.9795...
42184219
42194220 >>> result = df.aggregate(
42204221 ... [], [dfn.functions.regr_r2(
@@ -4249,19 +4250,19 @@ def regr_slope(
42494250
42504251 Examples:
42514252 >>> ctx = dfn.SessionContext()
4252- >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1 .0, 2 .0, 3 .0]})
4253+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [4 .0, 16 .0, 36 .0]})
42534254 >>> result = df.aggregate(
42544255 ... [], [dfn.functions.regr_slope(dfn.col("y"), dfn.col("x")).alias("v")])
42554256 >>> result.collect_column("v")[0].as_py()
4256- 2.0
4257+ 0.122...
42574258
42584259 >>> result = df.aggregate(
42594260 ... [], [dfn.functions.regr_slope(
42604261 ... dfn.col("y"), dfn.col("x"),
42614262 ... filter=dfn.col("y") > dfn.lit(2.0)
42624263 ... ).alias("v")])
42634264 >>> result.collect_column("v")[0].as_py()
4264- 2.0
4265+ 0.1
42654266 """
42664267 filter_raw = filter .expr if filter is not None else None
42674268
@@ -4517,12 +4518,12 @@ def nth_value(
45174518 >>> ctx = dfn.SessionContext()
45184519 >>> df = ctx.from_pydict({"a": [10, 20, 30]})
45194520 >>> result = df.aggregate(
4520- ... [], [dfn.functions.nth_value(dfn.col("a"), 2 ).alias("v")]
4521+ ... [], [dfn.functions.nth_value(dfn.col("a"), 1 ).alias("v")]
45214522 ... )
45224523 >>> result.collect_column("v")[0].as_py()
4523- 20
4524+ 10
45244525
4525- >>> df = ctx.from_pydict({"a": [None , 20, 10 ]})
4526+ >>> df = ctx.from_pydict({"a": [10 , 20, 30 ]})
45264527 >>> result = df.aggregate(
45274528 ... [], [dfn.functions.nth_value(
45284529 ... dfn.col("a"), 1,
@@ -4637,10 +4638,10 @@ def bit_xor(
46374638 >>> result = df.aggregate(
46384639 ... [], [dfn.functions.bit_xor(
46394640 ... dfn.col("a"), distinct=True,
4640- ... filter=dfn.col("a") > dfn.lit(0 ),
4641+ ... filter=dfn.col("a") > dfn.lit(3 ),
46414642 ... ).alias("v")])
46424643 >>> result.collect_column("v")[0].as_py()
4643- 6
4644+ 5
46444645 """
46454646 filter_raw = filter .expr if filter is not None else None
46464647 return Expr (f .bit_xor (expression .expr , distinct = distinct , filter = filter_raw ))
0 commit comments