diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f7868b9e46c37..05a150587294e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -642,6 +642,11 @@ def _get_indices(self, names): Safe get multiple indices, translate keys for datelike to underlying repr. """ + assert len(names) == 1 + if isna(names[0]): + return [self.indices.get(np.nan, [])] + if isinstance(names[0], tuple): + names[0] = tuple(np.nan if isna(comp) else comp for comp in names[0]) def get_converter(s): # possibly convert to the actual key types diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d86264cb95dc5..9243dffd14f74 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -652,9 +652,23 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """dict {group name -> group indices}""" if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): # This shows unused categories in indices GH#38642 - return self.groupings[0].indices - codes_list = [ping.codes for ping in self.groupings] - return get_indexer_dict(codes_list, self.levels) + result = self.groupings[0].indices + else: + codes_list = [ping.codes for ping in self.groupings] + result = get_indexer_dict(codes_list, self.levels) + if not self.dropna: + has_mi = isinstance(self.result_index, MultiIndex) + if not has_mi and self.result_index.hasnans: + result = { + np.nan if isna(key) else key: value for key, value in result.items() + } + elif has_mi: + # MultiIndex has no efficient way to tell if there are NAs + result = { + tuple(np.nan if isna(comp) else comp for comp in key): value + for key, value in result.items() + } + return result @final @cache_readonly diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 4fe3aac629513..c20fc9e3d62e7 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -606,3 +606,33 @@ def test_filter_consistent_result_before_after_agg_func(): grouper.sum() result = grouper.filter(lambda x: True) tm.assert_frame_equal(result, expected) + + +def test_filter_with_non_values(): + # GH 62501 + df = DataFrame( + [ + [1], + [None], + ], + columns=["a"], + ) + + result = df.groupby("a", dropna=False).filter(lambda x: True) + tm.assert_frame_equal(result, df) + + +def test_filter_with_non_values_multi_index(): + # GH 62501 + df = DataFrame( + [ + [1, 2], + [3, None], + [None, 4], + [None, None], + ], + columns=["a", "b"], + ) + + result = df.groupby(["a", "b"], dropna=False).filter(lambda x: True) + tm.assert_frame_equal(result, df)