From 22df4ce8004aaeb25f09c605ae97632e61ac013b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:47:26 +0000 Subject: [PATCH 1/4] Initial plan From cc11a0cb2788be817b0f952d44e01c9f5d494e23 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:55:21 +0000 Subject: [PATCH 2/4] Add validation for pl.DataType schema members - Add check for pl.DataType instances (e.g., pl.String()) - Add check for pl.DataType types (e.g., pl.String) - Add test cases for both scenarios - Tests pass and linting is clean Co-authored-by: borchero <22455425+borchero@users.noreply.github.com> --- dataframely/_base_schema.py | 14 ++++++++++++++ tests/schema/test_base.py | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index a0d4594..ca61b12 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -162,6 +162,20 @@ def __new__( f"Did you forget to add parentheses?" ) + # Check for pl.DataType instance (e.g., pl.String() instead of dy.String()) + if isinstance(value, pl.DataType): + raise TypeError( + f"Schema member '{attr}' is a polars DataType instance. " + f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String())." + ) + + # Check for pl.DataType type (e.g., pl.String instead of dy.String()) + if isinstance(value, type) and issubclass(value, pl.DataType): + raise TypeError( + f"Schema member '{attr}' is a polars DataType type. " + f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String or pl.String())." + ) + return cls if not TYPE_CHECKING: diff --git a/tests/schema/test_base.py b/tests/schema/test_base.py index 36cd085..15d6289 100644 --- a/tests/schema/test_base.py +++ b/tests/schema/test_base.py @@ -125,3 +125,19 @@ def test_user_error_column_type_not_instance() -> None: class MySchemaWithColumnTypeNotInstance(dy.Schema): a = dy.Int32(nullable=False, primary_key=True) b = dy.Float64 # User error: Forgot parentheses! + + +def test_user_error_polars_datatype_instance() -> None: + with pytest.raises(TypeError, match="polars DataType instance"): + + class MySchemaWithPolarsDataTypeInstance(dy.Schema): + a = dy.Int32(nullable=False) + b = pl.String() # User error: Used pl.String() instead of dy.String() + + +def test_user_error_polars_datatype_type() -> None: + with pytest.raises(TypeError, match="polars DataType type"): + + class MySchemaWithPolarsDataTypeType(dy.Schema): + a = dy.Int32(nullable=False) + b = pl.String # User error: Used pl.String instead of dy.String() From 8724e86e8132451d95c0a8ac621ec05ba05bee97 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:57:14 +0000 Subject: [PATCH 3/4] Fix error message for pl.DataType type validation Remove redundant example from error message to make it clearer Co-authored-by: borchero <22455425+borchero@users.noreply.github.com> --- dataframely/_base_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index ca61b12..0288ad8 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -173,7 +173,7 @@ def __new__( if isinstance(value, type) and issubclass(value, pl.DataType): raise TypeError( f"Schema member '{attr}' is a polars DataType type. " - f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String or pl.String())." + f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String)." ) return cls From 747352888ae4db3c57a843d75704fec05d24273a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 14:10:42 +0000 Subject: [PATCH 4/4] Combine pl.DataType checks into single if statement Refactor the two separate checks for pl.DataType instance and type into a single conditional as requested by reviewer Co-authored-by: borchero <22455425+borchero@users.noreply.github.com> --- dataframely/_base_schema.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index 0288ad8..81e6cb1 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -162,18 +162,17 @@ def __new__( f"Did you forget to add parentheses?" ) - # Check for pl.DataType instance (e.g., pl.String() instead of dy.String()) - if isinstance(value, pl.DataType): - raise TypeError( - f"Schema member '{attr}' is a polars DataType instance. " - f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String())." + # Check for pl.DataType instance or type (e.g., pl.String() or pl.String instead of dy.String()) + if isinstance(value, pl.DataType) or ( + isinstance(value, type) and issubclass(value, pl.DataType) + ): + value_type = "instance" if isinstance(value, pl.DataType) else "type" + example = ( + "pl.String()" if isinstance(value, pl.DataType) else "pl.String" ) - - # Check for pl.DataType type (e.g., pl.String instead of dy.String()) - if isinstance(value, type) and issubclass(value, pl.DataType): raise TypeError( - f"Schema member '{attr}' is a polars DataType type. " - f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., pl.String)." + f"Schema member '{attr}' is a polars DataType {value_type}. " + f"Use dataframely column types (e.g., dy.String()) instead of polars types (e.g., {example})." ) return cls