From b9643ac9c251f7a4b4dc05d496393dce63c26d4c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 09:29:51 +0000
Subject: [PATCH 1/3] Initial plan


From 63fbf3a4eb6790ea17981aba49f65d6fe4f8e205 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 09:37:42 +0000
Subject: [PATCH 2/3] Fix List sampling when min_length > 32

Co-authored-by: jjurm <6285777+jjurm@users.noreply.github.com>
---
 dataframely/columns/list.py     |  7 +++++--
 tests/column_types/test_list.py | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dataframely/columns/list.py b/dataframely/columns/list.py
index 5c22d24..6c1c66a 100644
--- a/dataframely/columns/list.py
+++ b/dataframely/columns/list.py
@@ -131,9 +131,12 @@ def pyarrow_dtype(self) -> pa.DataType:
     def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
         # First, sample the number of items per list element
         # NOTE: We default to 32 for the upper bound as we need some kind of reasonable
-        #  upper bound if none is set.
+        #  upper bound if none is set. If min_length is greater than 32, we use
+        #  min_length as the default upper bound instead.
+        min_len = self.min_length or 0
+        default_max = max(32, min_len)
         element_lengths = generator.sample_int(
-            n, min=self.min_length or 0, max=(self.max_length or 32) + 1
+            n, min=min_len, max=(self.max_length or default_max) + 1
         )
 
         # Then, we can sample the inner elements in a flat series
diff --git a/tests/column_types/test_list.py b/tests/column_types/test_list.py
index e860b85..44e17a0 100644
--- a/tests/column_types/test_list.py
+++ b/tests/column_types/test_list.py
@@ -179,3 +179,13 @@ def test_inner_primary_key_struct(
     _, failure = schema.filter(df)
     assert failure.counts() == {"a|primary_key": failure_count}
     assert validation_mask(df, failure).to_list() == mask
+
+
+@pytest.mark.parametrize("min_length", [0, 10, 33, 100])
+def test_list_sampling_with_min_length(min_length: int) -> None:
+    """Test that sampling works correctly when min_length > 32."""
+    schema = create_schema("test", {"a": dy.List(dy.Int64(), min_length=min_length)})
+    df = schema.sample(num_rows=10)
+    assert len(df) == 10
+    # Verify all lists have at least min_length elements
+    assert df["a"].list.len().min() >= min_length

From e1116b3246b5e3e8ee2402d08395049994d7afe2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 11:02:03 +0000
Subject: [PATCH 3/3] Fix mypy errors in test_list.py

Co-authored-by: jjurm <6285777+jjurm@users.noreply.github.com>
---
 tests/column_types/test_list.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/column_types/test_list.py b/tests/column_types/test_list.py
index 44e17a0..44bb858 100644
--- a/tests/column_types/test_list.py
+++ b/tests/column_types/test_list.py
@@ -1,6 +1,8 @@
 # Copyright (c) QuantCo 2025-2025
 # SPDX-License-Identifier: BSD-3-Clause
 
+from typing import cast
+
 import polars as pl
 import pytest
 
@@ -188,4 +190,5 @@ def test_list_sampling_with_min_length(min_length: int) -> None:
     df = schema.sample(num_rows=10)
     assert len(df) == 10
     # Verify all lists have at least min_length elements
-    assert df["a"].list.len().min() >= min_length
+    min_list_len = cast(int, df["a"].list.len().min())
+    assert min_list_len >= min_length