diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 0000000..243cda8 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,54 @@ +name: Integration Tests + +on: + push: + branches: [ main ] + paths: + - 'src/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - '.github/workflows/integration-tests.yml' + pull_request: + branches: [ main ] + paths: + - 'src/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - '.github/workflows/integration-tests.yml' + +jobs: + integration-tests: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" + + - name: Set up Python 3.12 + run: uv python install 3.12 + + - name: Install all dependencies + run: uv sync --group all_loaders --group test --group dev + + - name: Run integration tests with coverage + env: + USE_TESTCONTAINERS: "true" + TESTCONTAINERS_RYUK_DISABLED: "true" + run: | + uv run pytest tests/integration/ -v --tb=short -m "integration" \ + -k "not snowflake" \ + --cov=src/amp/loaders --cov-report=xml --cov-report=term-missing + + - name: Upload coverage reports + uses: codecov/codecov-action@v4 + if: always() + with: + file: ./coverage.xml + flags: integration + name: codecov-integration + fail_ci_if_error: false \ No newline at end of file diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..27a2691 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,115 @@ +name: Ruff + +on: + push: + branches: [ main ] + paths: + - '**.py' + - 'pyproject.toml' + - '.github/workflows/ruff.yml' + pull_request: + branches: [ main ] + paths: + - '**.py' + - 'pyproject.toml' + - '.github/workflows/ruff.yml' + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Ruff + run: pip install ruff + + - name: Run Ruff format check + run: ruff format --check . --diff + + - name: Run Ruff linter + run: ruff check . --output-format=github + + # Optional: Create a comment on PR with formatting suggestions + ruff-suggestions: + if: github.event_name == 'pull_request' && failure() + needs: ruff + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Ruff + run: pip install ruff + + - name: Generate fix suggestions + run: | + echo "## 🎨 Ruff Formatting & Linting Report" >> suggestions.md + echo "" >> suggestions.md + echo "Run the following commands locally to fix issues:" >> suggestions.md + echo "" >> suggestions.md + echo '```bash' >> suggestions.md + echo "ruff format ." >> suggestions.md + echo "ruff check . --fix" >> suggestions.md + echo '```' >> suggestions.md + echo "" >> suggestions.md + + # Show what would be changed + echo "### Formatting changes needed:" >> suggestions.md + echo '```diff' >> suggestions.md + ruff format --check . --diff >> suggestions.md 2>&1 || true + echo '```' >> suggestions.md + echo "" >> suggestions.md + + # Show linting issues + echo "### Linting issues:" >> suggestions.md + echo '```' >> suggestions.md + ruff check . >> suggestions.md 2>&1 || true + echo '```' >> suggestions.md + + - name: Comment PR + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const suggestions = fs.readFileSync('suggestions.md', 'utf8'); + + // Find existing comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('🎨 Ruff Formatting & Linting Report') + ); + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: suggestions + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: suggestions + }); + } \ No newline at end of file diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 0000000..0a56370 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,54 @@ +name: Unit Tests + +on: + push: + branches: [ main ] + paths: + - 'src/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - '.github/workflows/unit-tests.yml' + pull_request: + branches: [ main ] + paths: + - 'src/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - '.github/workflows/unit-tests.yml' + +jobs: + unit-tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.12'] + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} + + - name: Install dependencies + run: | + uv sync --all-groups + + - name: Run unit tests + run: | + uv run pytest tests/unit/ -m "unit" -v --tb=short + + - name: Run unit tests with coverage + run: | + uv run pytest tests/unit/ -m "unit" --cov=src/amp --cov-report=xml --cov-report=term-missing + + - name: Upload coverage reports + uses: codecov/codecov-action@v4 + if: always() + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false \ No newline at end of file diff --git a/README.md b/README.md index ca44052..97c22da 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ # Python Amp Client +[![Unit tests status](https://github.com/edgeandnode/amp-python/actions/workflows/unit-tests.yml/badge.svg?event=push)](https://github.com/edgeandnode/amp-python/actions/workflows/unit-tests.yml) +[![Integration tests status](https://github.com/edgeandnode/amp-python/actions/workflows/integration-tests.yml/badge.svg?event=push)](https://github.com/edgeandnode/amp-python/actions/workflows/integration-tests.yml) +[![Formatting status](https://github.com/edgeandnode/amp-python/actions/workflows/ruff.yml/badge.svg?event=push)](https://github.com/edgeandnode/amp-python/actions/workflows/ruff.yml) + + +## Overview + Client for issuing queries to an Amp server and working with the returned data. ## Installation @@ -45,11 +52,87 @@ You can then use it in your python scripts, apps or notebooks. The project is set up to use the [`pytest`](https://docs.pytest.org/en/stable/) testing framework. It follows [standard python test discovery rules](https://docs.pytest.org/en/stable/explanation/goodpractices.html#test-discovery). +## Quick Test Commands + Run all tests ```bash uv run pytest ``` +Run only unit tests (fast, no external dependencies) +```bash +make test-unit +``` + +Run integration tests with automatic container setup +```bash +make test-integration +``` + +Run all tests with coverage +```bash +make test-all +``` + +## Integration Testing + +Integration tests can run in two modes: + +### 1. Automatic Container Mode (Default) +The integration tests will automatically spin up PostgreSQL and Redis containers using testcontainers. This is the default mode and requires Docker to be installed and running. + +```bash +# Run integration tests with automatic containers +uv run pytest tests/integration/ -m integration +``` + +**Note**: The configuration automatically disables Ryuk (testcontainers cleanup container) to avoid Docker connectivity issues. If you need Ryuk enabled, set `TESTCONTAINERS_RYUK_DISABLED=false`. + +### 2. Manual Setup Mode +If you prefer to use your own database instances, you can disable testcontainers: + +```bash +# Disable testcontainers and use manual configuration +export USE_TESTCONTAINERS=false + +# Configure your database connections +export POSTGRES_HOST=localhost +export POSTGRES_PORT=5432 +export POSTGRES_DB=test_amp +export POSTGRES_USER=postgres +export POSTGRES_PASSWORD=yourpassword + +export REDIS_HOST=localhost +export REDIS_PORT=6379 +export REDIS_PASSWORD=yourpassword # Optional + +# Run tests +uv run pytest tests/integration/ -m integration +``` + +For manual setup, you can use the provided Makefile commands: +```bash +# Start test databases manually +make test-setup + +# Run tests +make test-integration + +# Clean up databases +make test-cleanup +``` + +## Loader-Specific Tests + +Run tests for specific loaders: +```bash +make test-postgresql # PostgreSQL tests +make test-redis # Redis tests +make test-deltalake # Delta Lake tests +make test-iceberg # Iceberg tests +make test-lmdb # LMDB tests +``` + # Linting and formatting Ruff is configured to be used for linting and formatting of this project. diff --git a/tests/conftest.py b/tests/conftest.py index d25249a..5d41e2a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,26 @@ logging.basicConfig(level=logging.INFO) +# Control whether to use testcontainers +USE_TESTCONTAINERS = os.getenv('USE_TESTCONTAINERS', 'true').lower() == 'true' + +# Disable Ryuk if not explicitly enabled (solves Docker connectivity issues) +if 'TESTCONTAINERS_RYUK_DISABLED' not in os.environ: + os.environ['TESTCONTAINERS_RYUK_DISABLED'] = 'true' + +# Import testcontainers conditionally +if USE_TESTCONTAINERS: + try: + from testcontainers.postgres import PostgresContainer + from testcontainers.redis import RedisContainer + + TESTCONTAINERS_AVAILABLE = True + except ImportError: + TESTCONTAINERS_AVAILABLE = False + logging.warning('Testcontainers not available. Falling back to manual configuration.') +else: + TESTCONTAINERS_AVAILABLE = False + # Shared configuration fixtures @pytest.fixture(scope='session') @@ -91,6 +111,75 @@ def test_config(): } +# Testcontainers fixtures +@pytest.fixture(scope='session') +def postgres_container(): + """PostgreSQL container for integration tests""" + if not TESTCONTAINERS_AVAILABLE: + pytest.skip('Testcontainers not available') + + container = PostgresContainer(image='postgres:13', username='test_user', password='test_pass', dbname='test_db') + container.start() + + yield container + + container.stop() + + +@pytest.fixture(scope='session') +def redis_container(): + """Redis container for integration tests""" + if not TESTCONTAINERS_AVAILABLE: + pytest.skip('Testcontainers not available') + + container = RedisContainer(image='redis:7-alpine') + container.start() + + yield container + + container.stop() + + +@pytest.fixture(scope='session') +def postgresql_test_config(request): + """PostgreSQL configuration from testcontainer or environment""" + if TESTCONTAINERS_AVAILABLE and USE_TESTCONTAINERS: + # Get the postgres_container fixture + postgres_container = request.getfixturevalue('postgres_container') + return { + 'host': postgres_container.get_container_host_ip(), + 'port': postgres_container.get_exposed_port(5432), + 'database': 'test_db', + 'user': 'test_user', + 'password': 'test_pass', + 'max_connections': 10, + 'batch_size': 10000, + } + else: + # Fall back to manual config from environment + return request.getfixturevalue('postgresql_config') + + +@pytest.fixture(scope='session') +def redis_test_config(request): + """Redis configuration from testcontainer or environment""" + if TESTCONTAINERS_AVAILABLE and USE_TESTCONTAINERS: + # Get the redis_container fixture + redis_container = request.getfixturevalue('redis_container') + return { + 'host': redis_container.get_container_host_ip(), + 'port': redis_container.get_exposed_port(6379), + 'db': 0, + 'password': None, # Default Redis container has no password + 'max_connections': 10, + 'batch_size': 100, + 'pipeline_size': 500, + } + else: + # Fall back to manual config from environment + return request.getfixturevalue('redis_config') + + @pytest.fixture(scope='session') def delta_test_env(): """Create Delta Lake test environment for the session""" diff --git a/tests/integration/test_postgresql_loader.py b/tests/integration/test_postgresql_loader.py index cf12c89..b2f11eb 100644 --- a/tests/integration/test_postgresql_loader.py +++ b/tests/integration/test_postgresql_loader.py @@ -37,14 +37,14 @@ def postgresql_type_test_data(): @pytest.fixture -def cleanup_tables(postgresql_config): +def cleanup_tables(postgresql_test_config): """Cleanup test tables after tests""" tables_to_clean = [] yield tables_to_clean # Cleanup - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) try: loader.connect() conn = loader.pool.getconn() @@ -68,9 +68,9 @@ def cleanup_tables(postgresql_config): class TestPostgreSQLLoaderIntegration: """Integration tests for PostgreSQL loader""" - def test_loader_connection(self, postgresql_config): + def test_loader_connection(self, postgresql_test_config): """Test basic connection to PostgreSQL""" - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) # Test connection loader.connect() @@ -82,11 +82,11 @@ def test_loader_connection(self, postgresql_config): assert loader._is_connected == False assert loader.pool is None - def test_context_manager(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_context_manager(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test context manager functionality""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: assert loader._is_connected == True @@ -97,11 +97,11 @@ def test_context_manager(self, postgresql_config, small_test_data, test_table_na # Should be disconnected after context assert loader._is_connected == False - def test_basic_table_operations(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_basic_table_operations(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test basic table creation and data loading""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Test initial table creation @@ -114,11 +114,11 @@ def test_basic_table_operations(self, postgresql_config, small_test_data, test_t assert 'columns' in result.metadata assert result.metadata['columns'] == 7 - def test_append_mode(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_append_mode(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test append mode functionality""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Initial load @@ -141,11 +141,11 @@ def test_append_mode(self, postgresql_config, small_test_data, test_table_name, finally: loader.pool.putconn(conn) - def test_overwrite_mode(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_overwrite_mode(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test overwrite mode functionality""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Initial load @@ -169,11 +169,11 @@ def test_overwrite_mode(self, postgresql_config, small_test_data, test_table_nam finally: loader.pool.putconn(conn) - def test_batch_loading(self, postgresql_config, medium_test_table, test_table_name, cleanup_tables): + def test_batch_loading(self, postgresql_test_config, medium_test_table, test_table_name, cleanup_tables): """Test batch loading functionality""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Test loading individual batches @@ -197,11 +197,11 @@ def test_batch_loading(self, postgresql_config, medium_test_table, test_table_na finally: loader.pool.putconn(conn) - def test_data_types(self, postgresql_config, postgresql_type_test_data, test_table_name, cleanup_tables): + def test_data_types(self, postgresql_test_config, postgresql_type_test_data, test_table_name, cleanup_tables): """Test various data types are handled correctly""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: result = loader.load_table(postgresql_type_test_data, test_table_name) @@ -222,11 +222,11 @@ def test_data_types(self, postgresql_config, postgresql_type_test_data, test_tab finally: loader.pool.putconn(conn) - def test_null_value_handling(self, postgresql_config, null_test_data, test_table_name, cleanup_tables): + def test_null_value_handling(self, postgresql_test_config, null_test_data, test_table_name, cleanup_tables): """Test comprehensive null value handling across all data types""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: result = loader.load_table(null_test_data, test_table_name) @@ -286,7 +286,7 @@ def test_null_value_handling(self, postgresql_config, null_test_data, test_table finally: loader.pool.putconn(conn) - def test_binary_data_handling(self, postgresql_config, test_table_name, cleanup_tables): + def test_binary_data_handling(self, postgresql_test_config, test_table_name, cleanup_tables): """Test binary data handling with INSERT fallback""" cleanup_tables.append(test_table_name) @@ -294,7 +294,7 @@ def test_binary_data_handling(self, postgresql_config, test_table_name, cleanup_ data = {'id': [1, 2, 3], 'binary_data': [b'hello', b'world', b'test'], 'text_data': ['a', 'b', 'c']} table = pa.Table.from_pydict(data) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: result = loader.load_table(table, test_table_name) @@ -313,11 +313,11 @@ def test_binary_data_handling(self, postgresql_config, test_table_name, cleanup_ finally: loader.pool.putconn(conn) - def test_schema_retrieval(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_schema_retrieval(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test schema retrieval functionality""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Create table @@ -334,9 +334,9 @@ def test_schema_retrieval(self, postgresql_config, small_test_data, test_table_n retrieved_names = set(schema.names) assert original_names == retrieved_names - def test_error_handling(self, postgresql_config, small_test_data): + def test_error_handling(self, postgresql_test_config, small_test_data): """Test error handling scenarios""" - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Test loading to non-existent table without create_table @@ -347,11 +347,11 @@ def test_error_handling(self, postgresql_config, small_test_data): assert result.rows_loaded == 0 assert 'does not exist' in result.error - def test_connection_pooling(self, postgresql_config, small_test_data, test_table_name, cleanup_tables): + def test_connection_pooling(self, postgresql_test_config, small_test_data, test_table_name, cleanup_tables): """Test connection pooling behavior""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: # Perform multiple operations to test pool reuse @@ -366,11 +366,11 @@ def test_connection_pooling(self, postgresql_config, small_test_data, test_table # Note: _used is a dict in ThreadedConnectionPool, not an int assert len(loader.pool._used) <= loader.pool.maxconn - def test_performance_metrics(self, postgresql_config, medium_test_table, test_table_name, cleanup_tables): + def test_performance_metrics(self, postgresql_test_config, medium_test_table, test_table_name, cleanup_tables): """Test performance metrics in results""" cleanup_tables.append(test_table_name) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: start_time = time.time() @@ -393,7 +393,7 @@ def test_performance_metrics(self, postgresql_config, medium_test_table, test_ta class TestPostgreSQLLoaderPerformance: """Performance tests for PostgreSQL loader""" - def test_large_data_loading(self, postgresql_config, test_table_name, cleanup_tables): + def test_large_data_loading(self, postgresql_test_config, test_table_name, cleanup_tables): """Test loading large datasets""" cleanup_tables.append(test_table_name) @@ -407,7 +407,7 @@ def test_large_data_loading(self, postgresql_config, test_table_name, cleanup_ta } large_table = pa.Table.from_pydict(large_data) - loader = PostgreSQLLoader(postgresql_config) + loader = PostgreSQLLoader(postgresql_test_config) with loader: result = loader.load_table(large_table, test_table_name) diff --git a/tests/integration/test_redis_loader.py b/tests/integration/test_redis_loader.py index 15f1189..115ec07 100644 --- a/tests/integration/test_redis_loader.py +++ b/tests/integration/test_redis_loader.py @@ -53,7 +53,7 @@ def comprehensive_test_data(): @pytest.fixture -def cleanup_redis(redis_config): +def cleanup_redis(redis_test_config): """Cleanup Redis data after tests""" keys_to_clean = [] patterns_to_clean = [] @@ -65,10 +65,10 @@ def cleanup_redis(redis_config): import redis r = redis.Redis( - host=redis_config['host'], - port=redis_config['port'], - db=redis_config['db'], - password=redis_config['password'], + host=redis_test_config['host'], + port=redis_test_config['port'], + db=redis_test_config['db'], + password=redis_test_config['password'], ) # Delete specific keys @@ -90,9 +90,9 @@ def cleanup_redis(redis_config): class TestRedisLoaderIntegration: """Integration tests for Redis loader""" - def test_loader_connection(self, redis_config): + def test_loader_connection(self, redis_test_config): """Test basic connection to Redis""" - loader = RedisLoader(redis_config) + loader = RedisLoader(redis_test_config) # Test connection loader.connect() @@ -110,12 +110,12 @@ def test_loader_connection(self, redis_config): assert loader._is_connected == False assert loader.redis_client is None - def test_context_manager(self, redis_config, small_test_data, cleanup_redis): + def test_context_manager(self, redis_test_config, small_test_data, cleanup_redis): """Test context manager functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_context:*') - loader = RedisLoader({**redis_config, 'data_structure': 'hash'}) + loader = RedisLoader({**redis_test_config, 'data_structure': 'hash'}) with loader: assert loader._is_connected == True @@ -126,12 +126,12 @@ def test_context_manager(self, redis_config, small_test_data, cleanup_redis): # Should be disconnected after context assert loader._is_connected == False - def test_hash_storage(self, redis_config, small_test_data, cleanup_redis): + def test_hash_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test hash data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_hash:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_hash:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_hash:{id}'} loader = RedisLoader(config) with loader: @@ -154,12 +154,12 @@ def test_hash_storage(self, redis_config, small_test_data, cleanup_redis): score = loader.redis_client.hget(key, 'score') assert int(score.decode()) == [100, 200, 150, 300, 250][i] - def test_string_storage(self, redis_config, small_test_data, cleanup_redis): + def test_string_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test string (JSON) data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_string:*') - config = {**redis_config, 'data_structure': 'string', 'key_pattern': 'test_string:{id}'} + config = {**redis_test_config, 'data_structure': 'string', 'key_pattern': 'test_string:{id}'} loader = RedisLoader(config) with loader: @@ -178,12 +178,12 @@ def test_string_storage(self, redis_config, small_test_data, cleanup_redis): assert json_data['name'] == ['Alice', 'Bob', 'Charlie', 'David', 'Eve'][i] assert json_data['score'] == [100, 200, 150, 300, 250][i] - def test_stream_storage(self, redis_config, small_test_data, cleanup_redis): + def test_stream_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test stream data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis keys_to_clean.append('test_stream:stream') - config = {**redis_config, 'data_structure': 'stream'} + config = {**redis_test_config, 'data_structure': 'stream'} loader = RedisLoader(config) with loader: @@ -200,12 +200,12 @@ def test_stream_storage(self, redis_config, small_test_data, cleanup_redis): info = loader.redis_client.xinfo_stream(stream_key) assert info['length'] == 5 - def test_set_storage(self, redis_config, small_test_data, cleanup_redis): + def test_set_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test set data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis keys_to_clean.append('test_set:set') - config = {**redis_config, 'data_structure': 'set', 'unique_field': 'name'} + config = {**redis_test_config, 'data_structure': 'set', 'unique_field': 'name'} loader = RedisLoader(config) with loader: @@ -224,12 +224,12 @@ def test_set_storage(self, redis_config, small_test_data, cleanup_redis): names = {m.decode() for m in members} assert names == {'Alice', 'Bob', 'Charlie', 'David', 'Eve'} - def test_sorted_set_storage(self, redis_config, small_test_data, cleanup_redis): + def test_sorted_set_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test sorted set data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis keys_to_clean.append('test_zset:zset') - config = {**redis_config, 'data_structure': 'sorted_set', 'score_field': 'score'} + config = {**redis_test_config, 'data_structure': 'sorted_set', 'score_field': 'score'} loader = RedisLoader(config) with loader: @@ -248,12 +248,12 @@ def test_sorted_set_storage(self, redis_config, small_test_data, cleanup_redis): scores = [score for _, score in members_with_scores] assert scores == [100.0, 150.0, 200.0, 250.0, 300.0] # Should be sorted - def test_list_storage(self, redis_config, small_test_data, cleanup_redis): + def test_list_storage(self, redis_test_config, small_test_data, cleanup_redis): """Test list data structure storage""" keys_to_clean, patterns_to_clean = cleanup_redis keys_to_clean.append('test_list:list') - config = {**redis_config, 'data_structure': 'list'} + config = {**redis_test_config, 'data_structure': 'list'} loader = RedisLoader(config) with loader: @@ -267,12 +267,12 @@ def test_list_storage(self, redis_config, small_test_data, cleanup_redis): assert loader.redis_client.exists(list_key) assert loader.redis_client.llen(list_key) == 5 - def test_append_mode(self, redis_config, small_test_data, cleanup_redis): + def test_append_mode(self, redis_test_config, small_test_data, cleanup_redis): """Test append mode functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_append:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_append:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_append:{id}'} loader = RedisLoader(config) with loader: @@ -296,12 +296,12 @@ def test_append_mode(self, redis_config, small_test_data, cleanup_redis): key = f'test_append:{i}' assert loader.redis_client.exists(key) - def test_overwrite_mode(self, redis_config, small_test_data, cleanup_redis): + def test_overwrite_mode(self, redis_test_config, small_test_data, cleanup_redis): """Test overwrite mode functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_overwrite:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_overwrite:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_overwrite:{id}'} loader = RedisLoader(config) with loader: @@ -320,12 +320,12 @@ def test_overwrite_mode(self, redis_config, small_test_data, cleanup_redis): assert not loader.redis_client.exists('test_overwrite:4') assert not loader.redis_client.exists('test_overwrite:5') - def test_batch_loading(self, redis_config, comprehensive_test_data, cleanup_redis): + def test_batch_loading(self, redis_test_config, comprehensive_test_data, cleanup_redis): """Test batch loading functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_batch:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_batch:{id}', 'batch_size': 250} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_batch:{id}', 'batch_size': 250} loader = RedisLoader(config) with loader: @@ -343,13 +343,13 @@ def test_batch_loading(self, redis_config, comprehensive_test_data, cleanup_redi assert total_rows == 1000 - def test_ttl_functionality(self, redis_config, small_test_data, cleanup_redis): + def test_ttl_functionality(self, redis_test_config, small_test_data, cleanup_redis): """Test TTL (time-to-live) functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_ttl:*') config = { - **redis_config, + **redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_ttl:{id}', 'ttl': 2, # 2 seconds TTL @@ -372,12 +372,12 @@ def test_ttl_functionality(self, redis_config, small_test_data, cleanup_redis): time.sleep(3) assert not loader.redis_client.exists(key) - def test_null_value_handling(self, redis_config, null_test_data, cleanup_redis): + def test_null_value_handling(self, redis_test_config, null_test_data, cleanup_redis): """Test comprehensive null value handling across all data types""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_nulls:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_nulls:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_nulls:{id}'} loader = RedisLoader(config) with loader: @@ -429,12 +429,12 @@ def test_null_value_handling(self, redis_config, null_test_data, cleanup_redis): expected_int = expected_ints[i - 1] # Convert id to index assert int(int_val.decode()) == expected_int - def test_null_value_handling_string_structure(self, redis_config, null_test_data, cleanup_redis): + def test_null_value_handling_string_structure(self, redis_test_config, null_test_data, cleanup_redis): """Test null value handling with string (JSON) data structure""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_json_nulls:*') - config = {**redis_config, 'data_structure': 'string', 'key_pattern': 'test_json_nulls:{id}'} + config = {**redis_test_config, 'data_structure': 'string', 'key_pattern': 'test_json_nulls:{id}'} loader = RedisLoader(config) with loader: @@ -463,7 +463,7 @@ def test_null_value_handling_string_structure(self, redis_config, null_test_data expected_int = expected_ints[i - 1] assert json_data['int_field'] == expected_int - def test_binary_data_handling(self, redis_config, cleanup_redis): + def test_binary_data_handling(self, redis_test_config, cleanup_redis): """Test binary data handling""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_binary:*') @@ -472,7 +472,7 @@ def test_binary_data_handling(self, redis_config, cleanup_redis): data = {'id': [1, 2, 3], 'binary_data': [b'hello', b'world', b'\x00\x01\x02\x03'], 'text_data': ['a', 'b', 'c']} table = pa.Table.from_pydict(data) - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_binary:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_binary:{id}'} loader = RedisLoader(config) with loader: @@ -485,12 +485,12 @@ def test_binary_data_handling(self, redis_config, cleanup_redis): assert loader.redis_client.hget('test_binary:2', 'binary_data') == b'world' assert loader.redis_client.hget('test_binary:3', 'binary_data') == b'\x00\x01\x02\x03' - def test_comprehensive_stats(self, redis_config, small_test_data, cleanup_redis): + def test_comprehensive_stats(self, redis_test_config, small_test_data, cleanup_redis): """Test comprehensive statistics functionality""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_stats:*') - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'test_stats:{id}'} + config = {**redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_stats:{id}'} loader = RedisLoader(config) with loader: @@ -505,10 +505,10 @@ def test_comprehensive_stats(self, redis_config, small_test_data, cleanup_redis) assert 'estimated_memory_bytes' in stats assert 'estimated_memory_mb' in stats - def test_error_handling(self, redis_config, small_test_data): + def test_error_handling(self, redis_test_config, small_test_data): """Test error handling scenarios""" # Test with invalid configuration - invalid_config = {**redis_config, 'host': 'invalid-host-that-does-not-exist', 'socket_connect_timeout': 1} + invalid_config = {**redis_test_config, 'host': 'invalid-host-that-does-not-exist', 'socket_connect_timeout': 1} loader = RedisLoader(invalid_config) import redis @@ -516,7 +516,7 @@ def test_error_handling(self, redis_config, small_test_data): with pytest.raises(redis.exceptions.ConnectionError): loader.connect() - def test_key_pattern_generation(self, redis_config, cleanup_redis): + def test_key_pattern_generation(self, redis_test_config, cleanup_redis): """Test various key pattern generations""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('complex:*') @@ -525,7 +525,11 @@ def test_key_pattern_generation(self, redis_config, cleanup_redis): data = {'user_id': ['u1', 'u2', 'u3'], 'session_id': ['s1', 's2', 's3'], 'timestamp': [100, 200, 300]} table = pa.Table.from_pydict(data) - config = {**redis_config, 'data_structure': 'hash', 'key_pattern': 'complex:{user_id}:{session_id}:{timestamp}'} + config = { + **redis_test_config, + 'data_structure': 'hash', + 'key_pattern': 'complex:{user_id}:{session_id}:{timestamp}', + } loader = RedisLoader(config) with loader: @@ -537,13 +541,13 @@ def test_key_pattern_generation(self, redis_config, cleanup_redis): assert loader.redis_client.exists('complex:u2:s2:200') assert loader.redis_client.exists('complex:u3:s3:300') - def test_performance_metrics(self, redis_config, comprehensive_test_data, cleanup_redis): + def test_performance_metrics(self, redis_test_config, comprehensive_test_data, cleanup_redis): """Test performance metrics in results""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_perf:*') config = { - **redis_config, + **redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_perf:{id}', 'batch_size': 100, @@ -573,7 +577,7 @@ def test_performance_metrics(self, redis_config, comprehensive_test_data, cleanu class TestRedisLoaderPerformance: """Performance tests for Redis loader""" - def test_large_data_loading(self, redis_config, cleanup_redis): + def test_large_data_loading(self, redis_test_config, cleanup_redis): """Test loading large datasets""" keys_to_clean, patterns_to_clean = cleanup_redis patterns_to_clean.append('test_large:*') @@ -589,7 +593,7 @@ def test_large_data_loading(self, redis_config, cleanup_redis): large_table = pa.Table.from_pydict(large_data) config = { - **redis_config, + **redis_test_config, 'data_structure': 'hash', 'key_pattern': 'test_large:{id}', 'batch_size': 1000, @@ -607,7 +611,7 @@ def test_large_data_loading(self, redis_config, cleanup_redis): # Verify performance metrics assert result.ops_per_second > 100 # Should handle >100 ops/sec - def test_data_structure_performance_comparison(self, redis_config, cleanup_redis): + def test_data_structure_performance_comparison(self, redis_test_config, cleanup_redis): """Compare performance across different data structures""" keys_to_clean, patterns_to_clean = cleanup_redis @@ -623,7 +627,7 @@ def test_data_structure_performance_comparison(self, redis_config, cleanup_redis keys_to_clean.append(f'perf_{structure}:{structure}') config = { - **redis_config, + **redis_test_config, 'data_structure': structure, 'key_pattern': f'perf_{structure}:{{id}}', 'score_field': 'score' if structure == 'sorted_set' else None,