@@ -1256,6 +1256,32 @@ def nunique(self):
12561256 """
12571257 raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
12581258
1259+ def value_counts (
1260+ self ,
1261+ normalize : bool = False ,
1262+ sort : bool = True ,
1263+ ascending : bool = False ,
1264+ dropna : bool = True ,
1265+ ):
1266+ """
1267+ Return a Series or DataFrame containing counts of unique rows.
1268+
1269+ Args:
1270+ normalize (bool, default False):
1271+ Return proportions rather than frequencies.
1272+ sort (bool, default True):
1273+ Sort by frequencies.
1274+ ascending (bool, default False):
1275+ Sort in ascending order.
1276+ dropna (bool, default True):
1277+ Don't include counts of rows that contain NA values.
1278+
1279+ Returns:
1280+ Series or DataFrame:
1281+ Series if the groupby as_index is True, otherwise DataFrame.
1282+ """
1283+ raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
1284+
12591285
12601286class DataFrameGroupBy (GroupBy ):
12611287 def agg (self , func , ** kwargs ):
@@ -1406,3 +1432,102 @@ def nunique(self):
14061432 Number of unique values within a BigQuery DataFrame.
14071433 """
14081434 raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
1435+
1436+ def value_counts (
1437+ self ,
1438+ subset = None ,
1439+ normalize : bool = False ,
1440+ sort : bool = True ,
1441+ ascending : bool = False ,
1442+ dropna : bool = True ,
1443+ ):
1444+ """
1445+ Return a Series or DataFrame containing counts of unique rows.
1446+
1447+ **Examples:**
1448+
1449+ >>> import bigframes.pandas as bpd
1450+ >>> import numpy as np
1451+ >>> bpd.options.display.progress_bar = None
1452+
1453+ >>> df = bpd.DataFrame({
1454+ ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'],
1455+ ... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'],
1456+ ... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR']
1457+ ... })
1458+
1459+ >>> df
1460+ gender education country
1461+ 0 male low US
1462+ 1 male medium FR
1463+ 2 female high US
1464+ 3 male low FR
1465+ 4 female high FR
1466+ 5 male low FR
1467+ <BLANKLINE>
1468+ [6 rows x 3 columns]
1469+
1470+ >>> df.groupby('gender').value_counts()
1471+ gender education country
1472+ female high FR 1
1473+ US 1
1474+ male low FR 2
1475+ US 1
1476+ medium FR 1
1477+ Name: count, dtype: Int64
1478+
1479+ >>> df.groupby('gender').value_counts(ascending=True)
1480+ gender education country
1481+ female high FR 1
1482+ US 1
1483+ male low US 1
1484+ medium FR 1
1485+ low FR 2
1486+ Name: count, dtype: Int64
1487+
1488+ >>> df.groupby('gender').value_counts(normalize=True)
1489+ gender education country
1490+ female high FR 0.5
1491+ US 0.5
1492+ male low FR 0.5
1493+ US 0.25
1494+ medium FR 0.25
1495+ Name: proportion, dtype: Float64
1496+
1497+ >>> df.groupby('gender', as_index=False).value_counts()
1498+ gender education country count
1499+ 0 female high FR 1
1500+ 1 female high US 1
1501+ 2 male low FR 2
1502+ 3 male low US 1
1503+ 4 male medium FR 1
1504+ <BLANKLINE>
1505+ [5 rows x 4 columns]
1506+
1507+ >>> df.groupby('gender', as_index=False).value_counts(normalize=True)
1508+ gender education country proportion
1509+ 0 female high FR 0.5
1510+ 1 female high US 0.5
1511+ 2 male low FR 0.5
1512+ 3 male low US 0.25
1513+ 4 male medium FR 0.25
1514+ <BLANKLINE>
1515+ [5 rows x 4 columns]
1516+
1517+ Args:
1518+ subset (list-like, optional):
1519+ Columns to use when counting unique combinations.
1520+ normalize (bool, default False):
1521+ Return proportions rather than frequencies.
1522+ sort (bool, default True):
1523+ Sort by frequencies.
1524+ ascending (bool, default False):
1525+ Sort in ascending order.
1526+ dropna (bool, default True):
1527+ Don't include counts of rows that contain NA values.
1528+
1529+ Returns:
1530+ Series or DataFrame:
1531+ Series if the groupby as_index is True, otherwise DataFrame.
1532+ """
1533+ raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
0 commit comments