Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
142 changes: 142 additions & 0 deletions airbyte_cdk/test/utils/transforms/test_cleaning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Unit tests for cleaning transforms."""
import pytest
from airbyte_cdk.utils.transforms.cleaning import (
to_lower,
strip_whitespace,
squash_whitespace,
normalize_unicode,
remove_punctuation,
map_values,
cast_numeric,
)

def test_to_lower():
"""Test string lowercasing function."""
# Test normal cases
assert to_lower("Hello") == "hello"
assert to_lower("HELLO") == "hello"
assert to_lower("HeLLo") == "hello"

# Test with spaces and special characters
assert to_lower("Hello World!") == "hello world!"
assert to_lower("Hello123") == "hello123"

# Test empty and None
assert to_lower("") == ""
assert to_lower(None) is None

def test_strip_whitespace():
"""Test whitespace stripping function."""
# Test normal cases
assert strip_whitespace(" hello ") == "hello"
assert strip_whitespace("hello") == "hello"

# Test with tabs and newlines
assert strip_whitespace("\thello\n") == "hello"
assert strip_whitespace(" hello\n world ") == "hello\n world"

# Test empty and None
assert strip_whitespace(" ") == ""
assert strip_whitespace("") == ""
assert strip_whitespace(None) is None

def test_squash_whitespace():
"""Test whitespace squashing function."""
# Test normal cases
assert squash_whitespace("hello world") == "hello world"
assert squash_whitespace(" hello world ") == "hello world"

# Test with tabs and newlines
assert squash_whitespace("hello\n\nworld") == "hello world"
assert squash_whitespace("hello\t\tworld") == "hello world"
assert squash_whitespace("\n hello \t world \n") == "hello world"

# Test empty and None
assert squash_whitespace(" ") == ""
assert squash_whitespace("") == ""
assert squash_whitespace(None) is None

def test_normalize_unicode():
"""Test unicode normalization function."""
# Test normal cases
assert normalize_unicode("hello") == "hello"

# Test composed characters
assert normalize_unicode("café") == "café" # Composed 'é'

# Test decomposed characters
decomposed = "cafe\u0301" # 'e' with combining acute accent
assert normalize_unicode(decomposed) == "café" # Should normalize to composed form

# Test different normalization forms
assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")

# Test empty and None
assert normalize_unicode("") == ""
assert normalize_unicode(None) is None

def test_remove_punctuation():
"""Test punctuation removal function."""
# Test normal cases
assert remove_punctuation("hello, world!") == "hello world"
assert remove_punctuation("hello.world") == "helloworld"

# Test with multiple punctuation marks
assert remove_punctuation("hello!!! world???") == "hello world"
assert remove_punctuation("hello@#$%world") == "helloworld"

# Test with unicode punctuation
assert remove_punctuation("hello—world") == "helloworld"
assert remove_punctuation("«hello»") == "hello"

# Test empty and None
assert remove_punctuation("") == ""
assert remove_punctuation(None) is None

def test_map_values():
"""Test value mapping function."""
mapping = {"a": 1, "b": 2, "c": 3}

# Test normal cases
assert map_values("a", mapping) == 1
assert map_values("b", mapping) == 2

# Test with default value
assert map_values("x", mapping) is None
assert map_values("x", mapping, default=0) == 0

# Test with different value types
mixed_mapping = {1: "one", "two": 2, None: "null"}
assert map_values(1, mixed_mapping) == "one"
assert map_values(None, mixed_mapping) == "null"

def test_cast_numeric():
"""Test numeric casting function."""
# Test successful casts
assert cast_numeric("123") == 123
assert cast_numeric("123.45") == 123.45
assert cast_numeric(123) == 123
assert cast_numeric(123.45) == 123.45

# Test integers vs floats
assert isinstance(cast_numeric("123"), int)
assert isinstance(cast_numeric("123.45"), float)

# Test empty values
assert cast_numeric(None) is None
assert cast_numeric("", on_error="none") is None # Need to specify on_error="none" to get None for empty string
assert cast_numeric(" ", on_error="none") is None # Need to specify on_error="none" to get None for whitespace

# Test empty values with default behavior (on_error="ignore")
assert cast_numeric("") == ""
assert cast_numeric(" ") == " "

# Test error handling modes
non_numeric = "abc"
assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
assert cast_numeric(non_numeric, on_error="none") is None
assert cast_numeric(non_numeric, on_error="default", default=0) == 0

# Test error raising
with pytest.raises(Exception):
cast_numeric(non_numeric, on_error="raise")
Comment on lines +13 to +142
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add return type annotations to test functions.

All test functions are missing -> None return type annotations. Would you consider adding these? Wdyt?

-def test_to_lower():
+def test_to_lower() -> None:
     """Test string lowercasing function."""

-def test_strip_whitespace():
+def test_strip_whitespace() -> None:
     """Test whitespace stripping function."""

-def test_squash_whitespace():
+def test_squash_whitespace() -> None:
     """Test whitespace squashing function."""

-def test_normalize_unicode():
+def test_normalize_unicode() -> None:
     """Test unicode normalization function."""

-def test_remove_punctuation():
+def test_remove_punctuation() -> None:
     """Test punctuation removal function."""

-def test_map_values():
+def test_map_values() -> None:
     """Test value mapping function."""

-def test_cast_numeric():
+def test_cast_numeric() -> None:
     """Test numeric casting function."""

Note: The comparison-overlap errors on lines 131-132, 136 will be resolved once the return type issue in cast_numeric (flagged in cleaning.py) is fixed.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def test_to_lower():
"""Test string lowercasing function."""
# Test normal cases
assert to_lower("Hello") == "hello"
assert to_lower("HELLO") == "hello"
assert to_lower("HeLLo") == "hello"
# Test with spaces and special characters
assert to_lower("Hello World!") == "hello world!"
assert to_lower("Hello123") == "hello123"
# Test empty and None
assert to_lower("") == ""
assert to_lower(None) is None
def test_strip_whitespace():
"""Test whitespace stripping function."""
# Test normal cases
assert strip_whitespace(" hello ") == "hello"
assert strip_whitespace("hello") == "hello"
# Test with tabs and newlines
assert strip_whitespace("\thello\n") == "hello"
assert strip_whitespace(" hello\n world ") == "hello\n world"
# Test empty and None
assert strip_whitespace(" ") == ""
assert strip_whitespace("") == ""
assert strip_whitespace(None) is None
def test_squash_whitespace():
"""Test whitespace squashing function."""
# Test normal cases
assert squash_whitespace("hello world") == "hello world"
assert squash_whitespace(" hello world ") == "hello world"
# Test with tabs and newlines
assert squash_whitespace("hello\n\nworld") == "hello world"
assert squash_whitespace("hello\t\tworld") == "hello world"
assert squash_whitespace("\n hello \t world \n") == "hello world"
# Test empty and None
assert squash_whitespace(" ") == ""
assert squash_whitespace("") == ""
assert squash_whitespace(None) is None
def test_normalize_unicode():
"""Test unicode normalization function."""
# Test normal cases
assert normalize_unicode("hello") == "hello"
# Test composed characters
assert normalize_unicode("café") == "café" # Composed 'é'
# Test decomposed characters
decomposed = "cafe\u0301" # 'e' with combining acute accent
assert normalize_unicode(decomposed) == "café" # Should normalize to composed form
# Test different normalization forms
assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
# Test empty and None
assert normalize_unicode("") == ""
assert normalize_unicode(None) is None
def test_remove_punctuation():
"""Test punctuation removal function."""
# Test normal cases
assert remove_punctuation("hello, world!") == "hello world"
assert remove_punctuation("hello.world") == "helloworld"
# Test with multiple punctuation marks
assert remove_punctuation("hello!!! world???") == "hello world"
assert remove_punctuation("hello@#$%world") == "helloworld"
# Test with unicode punctuation
assert remove_punctuation("hello—world") == "helloworld"
assert remove_punctuation("«hello»") == "hello"
# Test empty and None
assert remove_punctuation("") == ""
assert remove_punctuation(None) is None
def test_map_values():
"""Test value mapping function."""
mapping = {"a": 1, "b": 2, "c": 3}
# Test normal cases
assert map_values("a", mapping) == 1
assert map_values("b", mapping) == 2
# Test with default value
assert map_values("x", mapping) is None
assert map_values("x", mapping, default=0) == 0
# Test with different value types
mixed_mapping = {1: "one", "two": 2, None: "null"}
assert map_values(1, mixed_mapping) == "one"
assert map_values(None, mixed_mapping) == "null"
def test_cast_numeric():
"""Test numeric casting function."""
# Test successful casts
assert cast_numeric("123") == 123
assert cast_numeric("123.45") == 123.45
assert cast_numeric(123) == 123
assert cast_numeric(123.45) == 123.45
# Test integers vs floats
assert isinstance(cast_numeric("123"), int)
assert isinstance(cast_numeric("123.45"), float)
# Test empty values
assert cast_numeric(None) is None
assert cast_numeric("", on_error="none") is None # Need to specify on_error="none" to get None for empty string
assert cast_numeric(" ", on_error="none") is None # Need to specify on_error="none" to get None for whitespace
# Test empty values with default behavior (on_error="ignore")
assert cast_numeric("") == ""
assert cast_numeric(" ") == " "
# Test error handling modes
non_numeric = "abc"
assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
assert cast_numeric(non_numeric, on_error="none") is None
assert cast_numeric(non_numeric, on_error="default", default=0) == 0
# Test error raising
with pytest.raises(Exception):
cast_numeric(non_numeric, on_error="raise")
def test_to_lower() -> None:
"""Test string lowercasing function."""
# Test normal cases
assert to_lower("Hello") == "hello"
assert to_lower("HELLO") == "hello"
assert to_lower("HeLLo") == "hello"
# Test with spaces and special characters
assert to_lower("Hello World!") == "hello world!"
assert to_lower("Hello123") == "hello123"
# Test empty and None
assert to_lower("") == ""
assert to_lower(None) is None
def test_strip_whitespace() -> None:
"""Test whitespace stripping function."""
# Test normal cases
assert strip_whitespace(" hello ") == "hello"
assert strip_whitespace("hello") == "hello"
# Test with tabs and newlines
assert strip_whitespace("\thello\n") == "hello"
assert strip_whitespace(" hello\n world ") == "hello\n world"
# Test empty and None
assert strip_whitespace(" ") == ""
assert strip_whitespace("") == ""
assert strip_whitespace(None) is None
def test_squash_whitespace() -> None:
"""Test whitespace squashing function."""
# Test normal cases
assert squash_whitespace("hello world") == "hello world"
assert squash_whitespace(" hello world ") == "hello world"
# Test with tabs and newlines
assert squash_whitespace("hello\n\nworld") == "hello world"
assert squash_whitespace("hello\t\tworld") == "hello world"
assert squash_whitespace("\n hello \t world \n") == "hello world"
# Test empty and None
assert squash_whitespace(" ") == ""
assert squash_whitespace("") == ""
assert squash_whitespace(None) is None
def test_normalize_unicode() -> None:
"""Test unicode normalization function."""
# Test normal cases
assert normalize_unicode("hello") == "hello"
# Test composed characters
assert normalize_unicode("café") == "café" # Composed 'é'
# Test decomposed characters
decomposed = "cafe\u0301" # 'e' with combining acute accent
assert normalize_unicode(decomposed) == "café" # Should normalize to composed form
# Test different normalization forms
assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
# Test empty and None
assert normalize_unicode("") == ""
assert normalize_unicode(None) is None
def test_remove_punctuation() -> None:
"""Test punctuation removal function."""
# Test normal cases
assert remove_punctuation("hello, world!") == "hello world"
assert remove_punctuation("hello.world") == "helloworld"
# Test with multiple punctuation marks
assert remove_punctuation("hello!!! world???") == "hello world"
assert remove_punctuation("hello@#$%world") == "helloworld"
# Test with unicode punctuation
assert remove_punctuation("hello—world") == "helloworld"
assert remove_punctuation("«hello»") == "hello"
# Test empty and None
assert remove_punctuation("") == ""
assert remove_punctuation(None) is None
def test_map_values() -> None:
"""Test value mapping function."""
mapping = {"a": 1, "b": 2, "c": 3}
# Test normal cases
assert map_values("a", mapping) == 1
assert map_values("b", mapping) == 2
# Test with default value
assert map_values("x", mapping) is None
assert map_values("x", mapping, default=0) == 0
# Test with different value types
mixed_mapping = {1: "one", "two": 2, None: "null"}
assert map_values(1, mixed_mapping) == "one"
assert map_values(None, mixed_mapping) == "null"
def test_cast_numeric() -> None:
"""Test numeric casting function."""
# Test successful casts
assert cast_numeric("123") == 123
assert cast_numeric("123.45") == 123.45
assert cast_numeric(123) == 123
assert cast_numeric(123.45) == 123.45
# Test integers vs floats
assert isinstance(cast_numeric("123"), int)
assert isinstance(cast_numeric("123.45"), float)
# Test empty values
assert cast_numeric(None) is None
assert cast_numeric("", on_error="none") is None # Need to specify on_error="none" to get None for empty string
assert cast_numeric(" ", on_error="none") is None # Need to specify on_error="none" to get None for whitespace
# Test empty values with default behavior (on_error="ignore")
assert cast_numeric("") == ""
assert cast_numeric(" ") == " "
# Test error handling modes
non_numeric = "abc"
assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
assert cast_numeric(non_numeric, on_error="none") is None
assert cast_numeric(non_numeric, on_error="default", default=0) == 0
# Test error raising
with pytest.raises(Exception):
cast_numeric(non_numeric, on_error="raise")
🧰 Tools
🪛 GitHub Actions: Linters

[error] 13-13: Function is missing a return type annotation [no-untyped-def]


[error] 13-13: Use "-> None" if function does not return a value


[error] 28-28: Function is missing a return type annotation [no-untyped-def]


[error] 28-28: Use "-> None" if function does not return a value


[error] 43-43: Function is missing a return type annotation [no-untyped-def]


[error] 43-43: Use "-> None" if function does not return a value


[error] 59-59: Function is missing a return type annotation [no-untyped-def]


[error] 59-59: Use "-> None" if function does not return a value


[error] 78-78: Function is missing a return type annotation [no-untyped-def]


[error] 78-78: Use "-> None" if function does not return a value


[error] 96-96: Function is missing a return type annotation [no-untyped-def]


[error] 96-96: Use "-> None" if function does not return a value


[error] 113-113: Function is missing a return type annotation [no-untyped-def]


[error] 113-113: Use "-> None" if function does not return a value


[error] 131-131: Non-overlapping equality check (left operand type: "int | float | None", right operand type: "Literal['']") [comparison-overlap]


[error] 132-132: Non-overlapping equality check (left operand type: "int | float | None", right operand type: "Literal[' ']") [comparison-overlap]


[error] 136-136: Non-overlapping equality check (left operand type: "int | float | None", right operand type: "str") [comparison-overlap]

🤖 Prompt for AI Agents
In airbyte_cdk/test/utils/transforms/test_cleaning.py around lines 13-142 the
test functions (test_to_lower, test_strip_whitespace, test_squash_whitespace,
test_normalize_unicode, test_remove_punctuation, test_map_values,
test_cast_numeric) are missing explicit return type annotations; update each
function definition to include "-> None" (e.g., def test_to_lower() -> None:) so
all tests have explicit return types, then run the test suite to ensure no
further type-related failures.

72 changes: 72 additions & 0 deletions airbyte_cdk/test/utils/transforms/test_date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Unit tests for date transforms."""
from datetime import datetime

from airbyte_cdk.utils.transforms.date import (
try_parse_date,
extract_date_parts,
floor_to_month,
ceil_to_month,
)

def test_try_parse_date():
"""Test date parsing function."""
# Test with datetime object
dt = datetime(2023, 1, 15)
assert try_parse_date(dt) == dt

# Test with non-date object
assert try_parse_date("2023-01-15") is None
assert try_parse_date(123) is None
assert try_parse_date(None) is None

def test_extract_date_parts():
"""Test date parts extraction function."""
# Test with valid datetime
dt = datetime(2023, 1, 15) # Sunday
parts = extract_date_parts(dt)
assert parts["year"] == 2023
assert parts["month"] == 1
assert parts["day"] == 15
assert parts["dow"] == 6 # Sunday is 6

# Test with invalid input
parts = extract_date_parts(None)
assert all(v is None for v in parts.values())

parts = extract_date_parts("not a date")
assert all(v is None for v in parts.values())

def test_floor_to_month():
"""Test floor to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert floor_to_month(dt) == datetime(2023, 1, 1)

dt = datetime(2023, 12, 31)
assert floor_to_month(dt) == datetime(2023, 12, 1)

# Test first day of month
dt = datetime(2023, 1, 1)
assert floor_to_month(dt) == dt

# Test with invalid input
assert floor_to_month(None) is None
assert floor_to_month("not a date") is None

def test_ceil_to_month():
"""Test ceil to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert ceil_to_month(dt) == datetime(2023, 2, 1)

# Test end of year
dt = datetime(2023, 12, 15)
assert ceil_to_month(dt) == datetime(2024, 1, 1)

# Test first day of month
dt = datetime(2023, 1, 1)
assert ceil_to_month(dt) == datetime(2023, 2, 1)

# Test with invalid input
assert ceil_to_month(None) is None
assert ceil_to_month("not a date") is None
Comment on lines +11 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add return type annotations to test functions.

All test functions are missing -> None return type annotations. Would you mind adding these to satisfy the type checker? Wdyt?

-def test_try_parse_date():
+def test_try_parse_date() -> None:
     """Test date parsing function."""

-def test_extract_date_parts():
+def test_extract_date_parts() -> None:
     """Test date parts extraction function."""

-def test_floor_to_month():
+def test_floor_to_month() -> None:
     """Test floor to month function."""

-def test_ceil_to_month():
+def test_ceil_to_month() -> None:
     """Test ceil to month function."""

Note: The "Call to untyped function" errors will be resolved once the functions in date.py have proper type annotations.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def test_try_parse_date():
"""Test date parsing function."""
# Test with datetime object
dt = datetime(2023, 1, 15)
assert try_parse_date(dt) == dt
# Test with non-date object
assert try_parse_date("2023-01-15") is None
assert try_parse_date(123) is None
assert try_parse_date(None) is None
def test_extract_date_parts():
"""Test date parts extraction function."""
# Test with valid datetime
dt = datetime(2023, 1, 15) # Sunday
parts = extract_date_parts(dt)
assert parts["year"] == 2023
assert parts["month"] == 1
assert parts["day"] == 15
assert parts["dow"] == 6 # Sunday is 6
# Test with invalid input
parts = extract_date_parts(None)
assert all(v is None for v in parts.values())
parts = extract_date_parts("not a date")
assert all(v is None for v in parts.values())
def test_floor_to_month():
"""Test floor to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert floor_to_month(dt) == datetime(2023, 1, 1)
dt = datetime(2023, 12, 31)
assert floor_to_month(dt) == datetime(2023, 12, 1)
# Test first day of month
dt = datetime(2023, 1, 1)
assert floor_to_month(dt) == dt
# Test with invalid input
assert floor_to_month(None) is None
assert floor_to_month("not a date") is None
def test_ceil_to_month():
"""Test ceil to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert ceil_to_month(dt) == datetime(2023, 2, 1)
# Test end of year
dt = datetime(2023, 12, 15)
assert ceil_to_month(dt) == datetime(2024, 1, 1)
# Test first day of month
dt = datetime(2023, 1, 1)
assert ceil_to_month(dt) == datetime(2023, 2, 1)
# Test with invalid input
assert ceil_to_month(None) is None
assert ceil_to_month("not a date") is None
def test_try_parse_date() -> None:
"""Test date parsing function."""
# Test with datetime object
dt = datetime(2023, 1, 15)
assert try_parse_date(dt) == dt
# Test with non-date object
assert try_parse_date("2023-01-15") is None
assert try_parse_date(123) is None
assert try_parse_date(None) is None
def test_extract_date_parts() -> None:
"""Test date parts extraction function."""
# Test with valid datetime
dt = datetime(2023, 1, 15) # Sunday
parts = extract_date_parts(dt)
assert parts["year"] == 2023
assert parts["month"] == 1
assert parts["day"] == 15
assert parts["dow"] == 6 # Sunday is 6
# Test with invalid input
parts = extract_date_parts(None)
assert all(v is None for v in parts.values())
parts = extract_date_parts("not a date")
assert all(v is None for v in parts.values())
def test_floor_to_month() -> None:
"""Test floor to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert floor_to_month(dt) == datetime(2023, 1, 1)
dt = datetime(2023, 12, 31)
assert floor_to_month(dt) == datetime(2023, 12, 1)
# Test first day of month
dt = datetime(2023, 1, 1)
assert floor_to_month(dt) == dt
# Test with invalid input
assert floor_to_month(None) is None
assert floor_to_month("not a date") is None
def test_ceil_to_month() -> None:
"""Test ceil to month function."""
# Test normal cases
dt = datetime(2023, 1, 15)
assert ceil_to_month(dt) == datetime(2023, 2, 1)
# Test end of year
dt = datetime(2023, 12, 15)
assert ceil_to_month(dt) == datetime(2024, 1, 1)
# Test first day of month
dt = datetime(2023, 1, 1)
assert ceil_to_month(dt) == datetime(2023, 2, 1)
# Test with invalid input
assert ceil_to_month(None) is None
assert ceil_to_month("not a date") is None
🧰 Tools
🪛 GitHub Actions: Linters

[error] 11-11: Function is missing a return type annotation [no-untyped-def]


[error] 11-11: Use "-> None" if function does not return a value


[error] 22-22: Function is missing a return type annotation [no-untyped-def]


[error] 22-22: Use "-> None" if function does not return a value


[error] 39-39: Function is missing a return type annotation [no-untyped-def]


[error] 39-39: Call to untyped function "floor_to_month" in typed context [no-untyped-call]


[error] 46-46: Call to untyped function "floor_to_month" in typed context [no-untyped-call]


[error] 50-50: Call to untyped function "floor_to_month" in typed context [no-untyped-call]


[error] 53-53: Call to untyped function "floor_to_month" in typed context [no-untyped-call]


[error] 54-54: Call to untyped function "floor_to_month" in typed context [no-untyped-call]

🤖 Prompt for AI Agents
In airbyte_cdk/test/utils/transforms/test_date.py around lines 11 to 72, the
test functions lack return type annotations; update each test function
definition (test_try_parse_date, test_extract_date_parts, test_floor_to_month,
test_ceil_to_month) to include an explicit "-> None" return type (e.g., def
test_try_parse_date() -> None:) so the type checker sees them as properly
annotated tests.

117 changes: 117 additions & 0 deletions airbyte_cdk/test/utils/transforms/test_impute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Unit tests for imputation transforms."""
import pytest
from airbyte_cdk.utils.transforms.impute import (
_numeric_skewness,
choose_imputation_strategy,
compute_imputation_value,
fill_nulls_column,
fill_nulls_record,
ImputationReport,
)

def test_numeric_skewness():
"""Test skewness calculation function."""
# Test normal cases
assert _numeric_skewness([1, 2, 3]) == pytest.approx(0.0, abs=1e-10) # Symmetric data
assert _numeric_skewness([1, 1, 2]) > 0 # Positive skew
assert _numeric_skewness([1, 2, 2]) < 0 # Negative skew

# Test edge cases
assert _numeric_skewness([1, 1]) == 0.0 # Less than 3 values
assert _numeric_skewness([1, 1, 1]) == 0.0 # No variance

# Test with floating point values
assert _numeric_skewness([1.0, 2.0, 3.0]) == pytest.approx(0.0, abs=1e-10)

def test_choose_imputation_strategy():
"""Test imputation strategy selection function."""
# Test numeric data
assert choose_imputation_strategy([1, 2, 3]) == "mean" # Low skew
assert choose_imputation_strategy([1, 1, 10]) == "median" # High skew

# Test categorical data
assert choose_imputation_strategy(["a", "b", "c"], numeric=False) == "mode"
assert choose_imputation_strategy(["a", "a", "b"]) == "mode" # Autodetect non-numeric

# Test repeated values with custom threshold
assert choose_imputation_strategy([1, 1, 1, 2], unique_ratio_threshold=0.6) == "mode" # Low unique ratio (0.5 < 0.6)

# Test empty and None values
assert choose_imputation_strategy([]) == "mode"
assert choose_imputation_strategy([None, None]) == "mode"

# Test with mixed types
assert choose_imputation_strategy([1, "2", 3]) == "mode" # Non-numeric detected

def test_compute_imputation_value():
"""Test imputation value computation function."""
# Test mean strategy
assert compute_imputation_value([1, 2, 3], "mean") == 2.0
assert compute_imputation_value([1.5, 2.5, 3.5], "mean") == 2.5

# Test median strategy
assert compute_imputation_value([1, 2, 3, 4], "median") == 2.5
assert compute_imputation_value([1, 2, 3], "median") == 2.0

# Test mode strategy
assert compute_imputation_value([1, 1, 2], "mode") == 1
assert compute_imputation_value(["a", "a", "b"], "mode") == "a"

# Test with None values
assert compute_imputation_value([1, None, 3], "mean") == 2.0
assert compute_imputation_value([None, None], "mean") is None

# Test invalid strategy
with pytest.raises(ValueError):
compute_imputation_value([1, 2, 3], "invalid")

Comment on lines +12 to +67
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add return type annotations to test functions.

Test functions test_numeric_skewness, test_choose_imputation_strategy, and test_compute_imputation_value are missing -> None return type annotations. Would you mind adding these? Wdyt?

-def test_numeric_skewness():
+def test_numeric_skewness() -> None:
     """Test skewness calculation function."""

-def test_choose_imputation_strategy():
+def test_choose_imputation_strategy() -> None:
     """Test imputation strategy selection function."""

-def test_compute_imputation_value():
+def test_compute_imputation_value() -> None:
     """Test imputation value computation function."""
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def test_numeric_skewness():
"""Test skewness calculation function."""
# Test normal cases
assert _numeric_skewness([1, 2, 3]) == pytest.approx(0.0, abs=1e-10) # Symmetric data
assert _numeric_skewness([1, 1, 2]) > 0 # Positive skew
assert _numeric_skewness([1, 2, 2]) < 0 # Negative skew
# Test edge cases
assert _numeric_skewness([1, 1]) == 0.0 # Less than 3 values
assert _numeric_skewness([1, 1, 1]) == 0.0 # No variance
# Test with floating point values
assert _numeric_skewness([1.0, 2.0, 3.0]) == pytest.approx(0.0, abs=1e-10)
def test_choose_imputation_strategy():
"""Test imputation strategy selection function."""
# Test numeric data
assert choose_imputation_strategy([1, 2, 3]) == "mean" # Low skew
assert choose_imputation_strategy([1, 1, 10]) == "median" # High skew
# Test categorical data
assert choose_imputation_strategy(["a", "b", "c"], numeric=False) == "mode"
assert choose_imputation_strategy(["a", "a", "b"]) == "mode" # Autodetect non-numeric
# Test repeated values with custom threshold
assert choose_imputation_strategy([1, 1, 1, 2], unique_ratio_threshold=0.6) == "mode" # Low unique ratio (0.5 < 0.6)
# Test empty and None values
assert choose_imputation_strategy([]) == "mode"
assert choose_imputation_strategy([None, None]) == "mode"
# Test with mixed types
assert choose_imputation_strategy([1, "2", 3]) == "mode" # Non-numeric detected
def test_compute_imputation_value():
"""Test imputation value computation function."""
# Test mean strategy
assert compute_imputation_value([1, 2, 3], "mean") == 2.0
assert compute_imputation_value([1.5, 2.5, 3.5], "mean") == 2.5
# Test median strategy
assert compute_imputation_value([1, 2, 3, 4], "median") == 2.5
assert compute_imputation_value([1, 2, 3], "median") == 2.0
# Test mode strategy
assert compute_imputation_value([1, 1, 2], "mode") == 1
assert compute_imputation_value(["a", "a", "b"], "mode") == "a"
# Test with None values
assert compute_imputation_value([1, None, 3], "mean") == 2.0
assert compute_imputation_value([None, None], "mean") is None
# Test invalid strategy
with pytest.raises(ValueError):
compute_imputation_value([1, 2, 3], "invalid")
def test_numeric_skewness() -> None:
"""Test skewness calculation function."""
# Test normal cases
assert _numeric_skewness([1, 2, 3]) == pytest.approx(0.0, abs=1e-10) # Symmetric data
assert _numeric_skewness([1, 1, 2]) > 0 # Positive skew
assert _numeric_skewness([1, 2, 2]) < 0 # Negative skew
# Test edge cases
assert _numeric_skewness([1, 1]) == 0.0 # Less than 3 values
assert _numeric_skewness([1, 1, 1]) == 0.0 # No variance
# Test with floating point values
assert _numeric_skewness([1.0, 2.0, 3.0]) == pytest.approx(0.0, abs=1e-10)
def test_choose_imputation_strategy() -> None:
"""Test imputation strategy selection function."""
# Test numeric data
assert choose_imputation_strategy([1, 2, 3]) == "mean" # Low skew
assert choose_imputation_strategy([1, 1, 10]) == "median" # High skew
# Test categorical data
assert choose_imputation_strategy(["a", "b", "c"], numeric=False) == "mode"
assert choose_imputation_strategy(["a", "a", "b"]) == "mode" # Autodetect non-numeric
# Test repeated values with custom threshold
assert choose_imputation_strategy([1, 1, 1, 2], unique_ratio_threshold=0.6) == "mode" # Low unique ratio (0.5 < 0.6)
# Test empty and None values
assert choose_imputation_strategy([]) == "mode"
assert choose_imputation_strategy([None, None]) == "mode"
# Test with mixed types
assert choose_imputation_strategy([1, "2", 3]) == "mode" # Non-numeric detected
def test_compute_imputation_value() -> None:
"""Test imputation value computation function."""
# Test mean strategy
assert compute_imputation_value([1, 2, 3], "mean") == 2.0
assert compute_imputation_value([1.5, 2.5, 3.5], "mean") == 2.5
# Test median strategy
assert compute_imputation_value([1, 2, 3, 4], "median") == 2.5
assert compute_imputation_value([1, 2, 3], "median") == 2.0
# Test mode strategy
assert compute_imputation_value([1, 1, 2], "mode") == 1
assert compute_imputation_value(["a", "a", "b"], "mode") == "a"
# Test with None values
assert compute_imputation_value([1, None, 3], "mean") == 2.0
assert compute_imputation_value([None, None], "mean") is None
# Test invalid strategy
with pytest.raises(ValueError):
compute_imputation_value([1, 2, 3], "invalid")
🧰 Tools
🪛 GitHub Actions: Linters

[error] 12-12: Function is missing a return type annotation [no-untyped-def]


[error] 12-12: Use "-> None" if function does not return a value


[error] 26-26: Function is missing a return type annotation [no-untyped-def]


[error] 26-26: Use "-> None" if function does not return a value


[error] 12-12: Function is missing a return type annotation [no-untyped-def]


[error] 12-12: Use "-> None" if function does not return a value


[error] 26-26: Function is missing a return type annotation [no-untyped-def]


[error] 26-26: Use "-> None" if function does not return a value


[error] 46-46: Function is missing a return type annotation [no-untyped-def]


[error] 46-46: Use "-> None" if function does not return a value

🤖 Prompt for AI Agents
In airbyte_cdk/test/utils/transforms/test_impute.py around lines 12 to 67, the
three test functions lack explicit return type annotations; update each function
definition to include "-> None" (i.e., change "def test_numeric_skewness():",
"def test_choose_imputation_strategy():", and "def
test_compute_imputation_value():" to "def test_numeric_skewness() -> None:",
"def test_choose_imputation_strategy() -> None:", and "def
test_compute_imputation_value() -> None:" respectively), no other changes
required.

def test_fill_nulls_column():
"""Test column null filling function."""
# Test numeric data
values, report = fill_nulls_column([1, None, 3])
assert values == [1, 2.0, 3]
assert report.strategy == "mean"
assert report.value_used == 2.0

# Test categorical data
values, report = fill_nulls_column(["a", None, "a"])
assert values == ["a", "a", "a"]
assert report.strategy == "mode"
assert report.value_used == "a"

# Test explicit strategy
values, report = fill_nulls_column([1, None, 3], explicit_strategy="median")
assert values == [1, 2, 3]
assert report.strategy == "median"

# Test all None values
values, report = fill_nulls_column([None, None])
assert values == [None, None]
assert report.value_used is None

def test_fill_nulls_record():
"""Test record null filling function."""
# Test basic record filling
record = {"a": 1, "b": None, "c": "x"}
samples = {"a": [1, 2, 3], "b": [4, 5, 6], "c": ["x", "y", "x"]}
filled, reports = fill_nulls_record(record, ["a", "b", "c"], samples)

assert filled["a"] == 1
assert filled["b"] == 5.0 # Mean of samples
assert filled["c"] == "x"
assert len(reports) == 3
assert all(isinstance(r, ImputationReport) for r in reports)

# Test with explicit strategies
strategies = {"b": "median"}
filled, reports = fill_nulls_record(record, ["a", "b", "c"], samples, strategies=strategies)
assert filled["b"] == 5.0 # Median of samples

# Test with empty samples
filled, reports = fill_nulls_record(record, ["a", "b", "c"], {})
assert filled["b"] is None # No samples to impute from

# Test with missing columns
filled, reports = fill_nulls_record(record, ["a", "d"], samples)
assert "d" in filled
assert len(reports) == 2
Comment on lines +68 to +117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add return type annotations and fix type compatibility for samples dict.

Two issues here:

  1. Test functions missing -> None return type annotations
  2. The samples dict has type inference issues - the type checker sees dict[str, object] but expects Mapping[str, Sequence[Any]]

Would you consider adding explicit type hints to the samples dict to help the type checker? Wdyt?

-def test_fill_nulls_column():
+def test_fill_nulls_column() -> None:
     """Test column null filling function."""

-def test_fill_nulls_record():
+def test_fill_nulls_record() -> None:
     """Test record null filling function."""
     # Test basic record filling
     record = {"a": 1, "b": None, "c": "x"}
-    samples = {"a": [1, 2, 3], "b": [4, 5, 6], "c": ["x", "y", "x"]}
+    samples: dict[str, Sequence[Any]] = {"a": [1, 2, 3], "b": [4, 5, 6], "c": ["x", "y", "x"]}
     filled, reports = fill_nulls_record(record, ["a", "b", "c"], samples)

Apply similar type hints on lines 106-107 and 111.

🧰 Tools
🪛 GitHub Actions: Linters

[error] 68-68: Argument 3 to "fill_nulls_record" has incompatible type "dict[str, object]"; expected "Mapping[str, Sequence[Any]]" [arg-type]


[error] 107-107: Argument 3 to "fill_nulls_record" has incompatible type "dict[str, object]"; expected "Mapping[str, Sequence[Any]]" [arg-type]


[error] 115-115: Argument 3 to "fill_nulls_record" has incompatible type "dict[str, object]"; expected "Mapping[str, Sequence[Any]]" [arg-type]

🤖 Prompt for AI Agents
In airbyte_cdk/test/utils/transforms/test_impute.py around lines 68-117, the
test functions lack explicit return type annotations and the local samples
variables are inferred as dict[str, object] which conflicts with functions
expecting Mapping[str, Sequence[Any]]; add -> None to both
test_fill_nulls_column and test_fill_nulls_record declarations, and annotate
each samples variable with the appropriate type (e.g., samples: Mapping[str,
Sequence[Any]]) on the occurrences around lines 106-107 and 111 so the type
checker accepts the passed argument.

Loading
Loading