From d2a062b79bcedc910534eefb86a100c9dbffa92e Mon Sep 17 00:00:00 2001 From: AbelJSanchez Date: Wed, 3 Dec 2025 21:55:39 -0800 Subject: [PATCH 1/3] Added test_sum_string_dtype_coercion that checks summing numeric strings results in concatenation and not coercion to dtype int64 or float64 --- pandas/tests/frame/test_reductions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 4d8f163197416..0cb60bca8ee66 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1043,6 +1043,25 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 + def test_sum_string_dtype_coercion(self): + # GH#22642 + # Check that summing numeric strings results in concatenation + # and not conversion to dtype int64 or float64 + df = DataFrame({"a": ["483", "3"], "b": ["94", "759"]}) + result = df.sum(axis=1) + expected = Series(["48394", "3759"]) + tm.assert_series_equal(result, expected) + + df = DataFrame({"a": ["483.948", "3.0"], "b": ["94.2", "759.93"]}) + result = df.sum(axis=1) + expected = Series(["483.94894.2", "3.0759.93"]) + tm.assert_series_equal(result, expected) + + df = DataFrame({"a": ["483", "3.0"], "b": ["94.2", "79"]}) + result = df.sum(axis=1) + expected = Series(["48394.2", "3.079"]) + tm.assert_series_equal(result, expected) + # ---------------------------------------------------------------------- # Index of max / min From 8f5b427d5a3d69eda4aa743d101c8081f45e41aa Mon Sep 17 00:00:00 2001 From: AbelJSanchez Date: Thu, 4 Dec 2025 18:23:55 -0800 Subject: [PATCH 2/3] Modified test to use pytest.mark.parametrize instead of setting up 3 different assertions --- pandas/tests/frame/test_reductions.py | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 0cb60bca8ee66..ddce4be070e7b 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1043,23 +1043,28 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 - def test_sum_string_dtype_coercion(self): + @pytest.mark.parametrize( + "df, expected", + [ + ( + DataFrame({"a": ["483", "3"], "b": ["94", "759"]}), + Series(["48394", "3759"]), + ), + ( + DataFrame({"a": ["483.948", "3.0"], "b": ["94.2", "759.93"]}), + Series(["483.94894.2", "3.0759.93"]), + ), + ( + DataFrame({"a": ["483", "3.0"], "b": ["94.2", "79"]}), + Series(["48394.2", "3.079"]), + ), + ], + ) + def test_sum_string_dtype_coercion(self, df, expected): # GH#22642 # Check that summing numeric strings results in concatenation # and not conversion to dtype int64 or float64 - df = DataFrame({"a": ["483", "3"], "b": ["94", "759"]}) - result = df.sum(axis=1) - expected = Series(["48394", "3759"]) - tm.assert_series_equal(result, expected) - - df = DataFrame({"a": ["483.948", "3.0"], "b": ["94.2", "759.93"]}) - result = df.sum(axis=1) - expected = Series(["483.94894.2", "3.0759.93"]) - tm.assert_series_equal(result, expected) - - df = DataFrame({"a": ["483", "3.0"], "b": ["94.2", "79"]}) result = df.sum(axis=1) - expected = Series(["48394.2", "3.079"]) tm.assert_series_equal(result, expected) # ---------------------------------------------------------------------- From 719b9e41f2f7b36f7b0a0405a730799caaee7a81 Mon Sep 17 00:00:00 2001 From: AbelJSanchez Date: Fri, 5 Dec 2025 22:35:04 -0800 Subject: [PATCH 3/3] Moved Series and Dataframe calls to the body of the test --- pandas/tests/frame/test_reductions.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index ddce4be070e7b..5361a3755d672 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1044,26 +1044,22 @@ def test_sum_bools(self): assert bools.sum(axis=1)[0] == 10 @pytest.mark.parametrize( - "df, expected", + "input_data, expected_data", [ + ({"a": ["483", "3"], "b": ["94", "759"]}, ["48394", "3759"]), ( - DataFrame({"a": ["483", "3"], "b": ["94", "759"]}), - Series(["48394", "3759"]), - ), - ( - DataFrame({"a": ["483.948", "3.0"], "b": ["94.2", "759.93"]}), - Series(["483.94894.2", "3.0759.93"]), - ), - ( - DataFrame({"a": ["483", "3.0"], "b": ["94.2", "79"]}), - Series(["48394.2", "3.079"]), + {"a": ["483.948", "3.0"], "b": ["94.2", "759.93"]}, + ["483.94894.2", "3.0759.93"], ), + ({"a": ["483", "3.0"], "b": ["94.2", "79"]}, ["48394.2", "3.079"]), ], ) - def test_sum_string_dtype_coercion(self, df, expected): + def test_sum_string_dtype_coercion(self, input_data, expected_data): # GH#22642 # Check that summing numeric strings results in concatenation # and not conversion to dtype int64 or float64 + df = DataFrame(input_data) + expected = Series(expected_data) result = df.sum(axis=1) tm.assert_series_equal(result, expected)