Skip to content

Commit fd428c9

Browse files
add docstring
1 parent fc83189 commit fd428c9

File tree

1 file changed

+78
-0
lines changed
  • third_party/bigframes_vendored/pandas/core/groupby

1 file changed

+78
-0
lines changed

third_party/bigframes_vendored/pandas/core/groupby/__init__.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ class providing the base-class of operations.
99
"""
1010
from __future__ import annotations
1111

12+
from typing import Literal
13+
1214
from bigframes import constants
1315

1416

@@ -17,6 +19,82 @@ class GroupBy:
1719
Class for grouping and aggregating relational data.
1820
"""
1921

22+
def describe(self, include: None | Literal["all"] = None):
23+
"""
24+
Generate descriptive statistics.
25+
26+
Descriptive statistics include those that summarize the central
27+
tendency, dispersion and shape of a
28+
dataset's distribution, excluding ``NaN`` values.
29+
30+
Args:
31+
include ("all" or None, optional):
32+
If "all": All columns of the input will be included in the output.
33+
If None: The result will include all numeric columns.
34+
35+
.. note::
36+
Percentile values are approximates only.
37+
38+
.. note::
39+
For numeric data, the result's index will include ``count``,
40+
``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
41+
upper percentiles. By default the lower percentile is ``25`` and the
42+
upper percentile is ``75``. The ``50`` percentile is the
43+
same as the median.
44+
45+
**Examples:**
46+
47+
>>> import bigframes.pandas as bpd
48+
>>> bpd.options.display.progress_bar = None
49+
50+
>>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8], "C": ["cat", "cat", "dog"]})
51+
>>> df
52+
A B C
53+
0 3 0 cat
54+
1 1 2 cat
55+
2 2 8 dog
56+
<BLANKLINE>
57+
[3 rows x 3 columns]
58+
59+
>>> df.describe()
60+
A B
61+
count 3.0 3.0
62+
mean 2.0 3.333333
63+
std 1.0 4.163332
64+
min 1.0 0.0
65+
25% 1.0 0.0
66+
50% 2.0 2.0
67+
75% 3.0 8.0
68+
max 3.0 8.0
69+
<BLANKLINE>
70+
[8 rows x 2 columns]
71+
72+
73+
Using describe with include = "all":
74+
>>> df.describe(include="all")
75+
A B C
76+
count 3.0 3.0 3
77+
nunique <NA> <NA> 2
78+
mean 2.0 3.333333 <NA>
79+
std 1.0 4.163332 <NA>
80+
min 1.0 0.0 <NA>
81+
25% 1.0 0.0 <NA>
82+
50% 2.0 2.0 <NA>
83+
75% 3.0 8.0 <NA>
84+
max 3.0 8.0 <NA>
85+
<BLANKLINE>
86+
[9 rows x 3 columns]
87+
88+
Returns:
89+
bigframes.pandas.DataFrame:
90+
Summary statistics of the Series or Dataframe provided.
91+
92+
Raises:
93+
ValueError:
94+
If unsupported ``include`` type is provided.
95+
"""
96+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
97+
2098
def any(self):
2199
"""
22100
Return True if any value in the group is true, else False.

0 commit comments

Comments
 (0)