Skip to content

Commit a709b22

Browse files
author
T. Koskamp
committed
BUG: Inconsistent behavior of Groupby with None values with filter (#62501)
- Add test cases - Add tuple support - Incorporate feedback
1 parent e405863 commit a709b22

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

pandas/core/groupby/groupby.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -679,21 +679,32 @@ def get_converter(s):
679679
)
680680
raise ValueError(msg) from err
681681

682-
converters = (get_converter(s) for s in index_sample)
682+
has_nan = any(isna(n) for n in name_sample)
683+
684+
sample = name_sample if has_nan else index_sample
685+
converters = (get_converter(s) for s in sample)
686+
683687
names = (
684688
tuple(f(n) for f, n in zip(converters, name, strict=True))
685689
for name in names
686690
)
687691

688-
elif any(isna(k) for k in self.indices.keys()):
689-
converters = [get_converter(name) for name in names]
690-
names = (converter(name) for converter, name in zip(converters, names))
691-
692+
indices = self.indices
693+
if not self.dropna and has_nan:
694+
indices = {}
695+
for k, v in self.indices.items():
696+
k = tuple(np.nan if isna(e) else e for e in k)
697+
indices[k] = v
692698
else:
693-
converter = get_converter(index_sample)
699+
has_nan = isna(name_sample)
700+
701+
convert_sample = name_sample if has_nan else index_sample
702+
converter = get_converter(convert_sample)
694703
names = (converter(name) for name in names)
695704

696-
indices = {np.nan if isna(k) else k: v for k, v in self.indices.items()}
705+
indices = self.indices
706+
if not self.dropna and has_nan:
707+
indices = {np.nan if isna(k) else k: v for k, v in indices.items()}
697708

698709
return [indices.get(name, []) for name in names]
699710

pandas/tests/groupby/test_filters.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,3 +606,33 @@ def test_filter_consistent_result_before_after_agg_func():
606606
grouper.sum()
607607
result = grouper.filter(lambda x: True)
608608
tm.assert_frame_equal(result, expected)
609+
610+
611+
def test_filter_with_non_values():
612+
# GH 62501
613+
df = DataFrame(
614+
[
615+
[1],
616+
[None],
617+
],
618+
columns=["a"],
619+
)
620+
621+
result = df.groupby("a", dropna=False).filter(lambda x: True)
622+
tm.assert_frame_equal(result, df)
623+
624+
625+
def test_filter_with_non_values_multi_index():
626+
# GH 62501
627+
df = DataFrame(
628+
[
629+
[1, 2],
630+
[3, None],
631+
[None, 4],
632+
[None, None],
633+
],
634+
columns=["a", "b"],
635+
)
636+
637+
result = df.groupby(["a", "b"], dropna=False).filter(lambda x: True)
638+
tm.assert_frame_equal(result, df)

0 commit comments

Comments
 (0)