@@ -9,6 +9,8 @@ class providing the base-class of operations.
99"""
1010from __future__ import annotations
1111
12+ from typing import Literal
13+
1214from bigframes import constants
1315
1416
@@ -17,6 +19,82 @@ class GroupBy:
1719 Class for grouping and aggregating relational data.
1820 """
1921
22+ def describe (self , include : None | Literal ["all" ] = None ):
23+ """
24+ Generate descriptive statistics.
25+
26+ Descriptive statistics include those that summarize the central
27+ tendency, dispersion and shape of a
28+ dataset's distribution, excluding ``NaN`` values.
29+
30+ Args:
31+ include ("all" or None, optional):
32+ If "all": All columns of the input will be included in the output.
33+ If None: The result will include all numeric columns.
34+
35+ .. note::
36+ Percentile values are approximates only.
37+
38+ .. note::
39+ For numeric data, the result's index will include ``count``,
40+ ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
41+ upper percentiles. By default the lower percentile is ``25`` and the
42+ upper percentile is ``75``. The ``50`` percentile is the
43+ same as the median.
44+
45+ **Examples:**
46+
47+ >>> import bigframes.pandas as bpd
48+ >>> bpd.options.display.progress_bar = None
49+
50+ >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8], "C": ["cat", "cat", "dog"]})
51+ >>> df
52+ A B C
53+ 0 3 0 cat
54+ 1 1 2 cat
55+ 2 2 8 dog
56+ <BLANKLINE>
57+ [3 rows x 3 columns]
58+
59+ >>> df.describe()
60+ A B
61+ count 3.0 3.0
62+ mean 2.0 3.333333
63+ std 1.0 4.163332
64+ min 1.0 0.0
65+ 25% 1.0 0.0
66+ 50% 2.0 2.0
67+ 75% 3.0 8.0
68+ max 3.0 8.0
69+ <BLANKLINE>
70+ [8 rows x 2 columns]
71+
72+
73+ Using describe with include = "all":
74+ >>> df.describe(include="all")
75+ A B C
76+ count 3.0 3.0 3
77+ nunique <NA> <NA> 2
78+ mean 2.0 3.333333 <NA>
79+ std 1.0 4.163332 <NA>
80+ min 1.0 0.0 <NA>
81+ 25% 1.0 0.0 <NA>
82+ 50% 2.0 2.0 <NA>
83+ 75% 3.0 8.0 <NA>
84+ max 3.0 8.0 <NA>
85+ <BLANKLINE>
86+ [9 rows x 3 columns]
87+
88+ Returns:
89+ bigframes.pandas.DataFrame:
90+ Summary statistics of the Series or Dataframe provided.
91+
92+ Raises:
93+ ValueError:
94+ If unsupported ``include`` type is provided.
95+ """
96+ raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
97+
2098 def any (self ):
2199 """
22100 Return True if any value in the group is true, else False.
0 commit comments