databricks · varun-edachali-dbx · May 28, 2025 · May 30, 2025 · Jun 3, 2025 · Jun 4, 2025
diff --git a/examples/experimental/sea_connector_test.py b/examples/experimental/sea_connector_test.py
@@ -0,0 +1,121 @@
+"""
+Main script to run all SEA connector tests.
+
+This script runs all the individual test modules and displays
+a summary of test results with visual indicators.
+
+In order to run the script, the following environment variables need to be set:
+- DATABRICKS_SERVER_HOSTNAME: The hostname of the Databricks server
+- DATABRICKS_HTTP_PATH: The HTTP path of the Databricks server
+- DATABRICKS_TOKEN: The token to use for authentication
+"""
+
+import os
+import sys
+import logging
+import subprocess
+from typing import List, Tuple
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+TEST_MODULES = [
+    "test_sea_session",
+    "test_sea_sync_query",
+    "test_sea_async_query",
+    "test_sea_metadata",
+]
+
+
+def run_test_module(module_name: str) -> bool:
+    """Run a test module and return success status."""
+    module_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "tests", f"{module_name}.py"
+    )
+
+    # Simply run the module as a script - each module handles its own test execution
+    result = subprocess.run(
+        [sys.executable, module_path], capture_output=True, text=True
+    )
+
+    # Log the output from the test module
+    if result.stdout:
+        for line in result.stdout.strip().split("\n"):
+            logger.info(line)
+
+    if result.stderr:
+        for line in result.stderr.strip().split("\n"):
+            logger.error(line)
+
+    return result.returncode == 0
+
+
+def run_tests() -> List[Tuple[str, bool]]:
+    """Run all tests and return results."""
+    results = []
+
+    for module_name in TEST_MODULES:
+        try:
+            logger.info(f"\n{'=' * 50}")
+            logger.info(f"Running test: {module_name}")
+            logger.info(f"{'-' * 50}")
+
+            success = run_test_module(module_name)
+            results.append((module_name, success))
+
+            status = "✅ PASSED" if success else "❌ FAILED"
+            logger.info(f"Test {module_name}: {status}")
+
+        except Exception as e:
+            logger.error(f"Error loading or running test {module_name}: {str(e)}")
+            import traceback
+
+            logger.error(traceback.format_exc())
+            results.append((module_name, False))
+
+    return results
+
+
+def print_summary(results: List[Tuple[str, bool]]) -> None:
+    """Print a summary of test results."""
+    logger.info(f"\n{'=' * 50}")
+    logger.info("TEST SUMMARY")
+    logger.info(f"{'-' * 50}")
+
+    passed = sum(1 for _, success in results if success)
+    total = len(results)
+
+    for module_name, success in results:
+        status = "✅ PASSED" if success else "❌ FAILED"
+        logger.info(f"{status} - {module_name}")
+
+    logger.info(f"{'-' * 50}")
+    logger.info(f"Total: {total} | Passed: {passed} | Failed: {total - passed}")
+    logger.info(f"{'=' * 50}")
+
+
+if __name__ == "__main__":
+    # Check if required environment variables are set
+    required_vars = [
+        "DATABRICKS_SERVER_HOSTNAME",
+        "DATABRICKS_HTTP_PATH",
+        "DATABRICKS_TOKEN",
+    ]
+    missing_vars = [var for var in required_vars if not os.environ.get(var)]
+
+    if missing_vars:
+        logger.error(
+            f"Missing required environment variables: {', '.join(missing_vars)}"
+        )
+        logger.error("Please set these variables before running the tests.")
+        sys.exit(1)
+
+    # Run all tests
+    results = run_tests()
+
+    # Print summary
+    print_summary(results)
+
+    # Exit with appropriate status code
+    all_passed = all(success for _, success in results)
+    sys.exit(0 if all_passed else 1)
diff --git a/examples/experimental/tests/__init__.py b/examples/experimental/tests/__init__.py
diff --git a/examples/experimental/tests/test_sea_async_query.py b/examples/experimental/tests/test_sea_async_query.py
@@ -0,0 +1,241 @@
+"""
+Test for SEA asynchronous query execution functionality.
+"""
+import os
+import sys
+import logging
+import time
+from databricks.sql.client import Connection
+from databricks.sql.backend.types import CommandState
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def test_sea_async_query_with_cloud_fetch():
+    """
+    Test executing a query asynchronously using the SEA backend with cloud fetch enabled.
+
+    This function connects to a Databricks SQL endpoint using the SEA backend,
+    executes a simple query asynchronously with cloud fetch enabled, and verifies that execution completes successfully.
+    """
+    server_hostname = os.environ.get("DATABRICKS_SERVER_HOSTNAME")
+    http_path = os.environ.get("DATABRICKS_HTTP_PATH")
+    access_token = os.environ.get("DATABRICKS_TOKEN")
+    catalog = os.environ.get("DATABRICKS_CATALOG")
+
+    if not all([server_hostname, http_path, access_token]):
+        logger.error("Missing required environment variables.")
+        logger.error(
+            "Please set DATABRICKS_SERVER_HOSTNAME, DATABRICKS_HTTP_PATH, and DATABRICKS_TOKEN."
+        )
+        return False
+
+    try:
+        # Create connection with cloud fetch enabled
+        logger.info(
+            "Creating connection for asynchronous query execution with cloud fetch enabled"
+        )
+        connection = Connection(
+            server_hostname=server_hostname,
+            http_path=http_path,
+            access_token=access_token,
+            catalog=catalog,
+            schema="default",
+            use_sea=True,
+            user_agent_entry="SEA-Test-Client",
+            use_cloud_fetch=True,
+            enable_query_result_lz4_compression=False,
+        )
+
+        logger.info(
+            f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
+        )
+
+        # Execute a query that generates large rows to force multiple chunks
+        requested_row_count = 5000
+        cursor = connection.cursor()
+        query = f"""
+        SELECT 
+            id, 
+            concat('value_', repeat('a', 10000)) as test_value
+        FROM range(1, {requested_row_count} + 1) AS t(id)
+        """
+
+        logger.info(
+            f"Executing asynchronous query with cloud fetch to generate {requested_row_count} rows"
+        )
+        cursor.execute_async(query)
+        logger.info(
+            "Asynchronous query submitted successfully with cloud fetch enabled"
+        )
+
+        # Check query state
+        logger.info("Checking query state...")
+        while cursor.is_query_pending():
+            logger.info("Query is still pending, waiting...")
+            time.sleep(1)
+
+        logger.info("Query is no longer pending, getting results...")
+        cursor.get_async_execution_result()
+
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
+
+        logger.info(
+            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
+        )
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
+
+        logger.info(
+            "PASS: Received correct number of rows with cloud fetch and all fetch methods work correctly"
+        )
+
+        # Close resources
+        cursor.close()
+        connection.close()
+        logger.info("Successfully closed SEA session")
+
+        return True
+
+    except Exception as e:
+        logger.error(
+            f"Error during SEA asynchronous query execution test with cloud fetch: {str(e)}"
+        )
+        import traceback
+
+        logger.error(traceback.format_exc())
+        return False
+
+
+def test_sea_async_query_without_cloud_fetch():
+    """
+    Test executing a query asynchronously using the SEA backend with cloud fetch disabled.
+
+    This function connects to a Databricks SQL endpoint using the SEA backend,
+    executes a simple query asynchronously with cloud fetch disabled, and verifies that execution completes successfully.
+    """
+    server_hostname = os.environ.get("DATABRICKS_SERVER_HOSTNAME")
+    http_path = os.environ.get("DATABRICKS_HTTP_PATH")
+    access_token = os.environ.get("DATABRICKS_TOKEN")
+    catalog = os.environ.get("DATABRICKS_CATALOG")
+
+    if not all([server_hostname, http_path, access_token]):
+        logger.error("Missing required environment variables.")
+        logger.error(
+            "Please set DATABRICKS_SERVER_HOSTNAME, DATABRICKS_HTTP_PATH, and DATABRICKS_TOKEN."
+        )
+        return False
+
+    try:
+        # Create connection with cloud fetch disabled
+        logger.info(
+            "Creating connection for asynchronous query execution with cloud fetch disabled"
+        )
+        connection = Connection(
+            server_hostname=server_hostname,
+            http_path=http_path,
+            access_token=access_token,
+            catalog=catalog,
+            schema="default",
+            use_sea=True,
+            user_agent_entry="SEA-Test-Client",
+            use_cloud_fetch=False,
+            enable_query_result_lz4_compression=False,
+        )
+
+        logger.info(
+            f"Successfully opened SEA session with ID: {connection.get_session_id_hex()}"
+        )
+
+        # For non-cloud fetch, use a smaller row count to avoid exceeding inline limits
+        requested_row_count = 100
+        cursor = connection.cursor()
+        query = f"""
+        SELECT 
+            id, 
+            concat('value_', repeat('a', 100)) as test_value
+        FROM range(1, {requested_row_count} + 1) AS t(id)
+        """
+
+        logger.info(
+            f"Executing asynchronous query without cloud fetch to generate {requested_row_count} rows"
+        )
+        cursor.execute_async(query)
+        logger.info(
+            "Asynchronous query submitted successfully with cloud fetch disabled"
+        )
+
+        # Check query state
+        logger.info("Checking query state...")
+        while cursor.is_query_pending():
+            logger.info("Query is still pending, waiting...")
+            time.sleep(1)
+
+        logger.info("Query is no longer pending, getting results...")
+        cursor.get_async_execution_result()
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        actual_row_count = len(results)
+
+        logger.info(
+            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
+        )
+
+        # Verify total row count
+        if actual_row_count != requested_row_count:
+            logger.error(
+                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
+            )
+            return False
+
+        logger.info(
+            "PASS: Received correct number of rows without cloud fetch and all fetch methods work correctly"
+        )
+
+        # Close resources
+        cursor.close()
+        connection.close()
+        logger.info("Successfully closed SEA session")
+
+        return True
+
+    except Exception as e:
+        logger.error(
+            f"Error during SEA asynchronous query execution test without cloud fetch: {str(e)}"
+        )
+        import traceback
+
+        logger.error(traceback.format_exc())
+        return False
+
+
+def test_sea_async_query_exec():
+    """
+    Run both asynchronous query tests and return overall success.
+    """
+    with_cloud_fetch_success = test_sea_async_query_with_cloud_fetch()
+    logger.info(
+        f"Asynchronous query with cloud fetch: {'✅ PASSED' if with_cloud_fetch_success else '❌ FAILED'}"
+    )
+
+    without_cloud_fetch_success = test_sea_async_query_without_cloud_fetch()
+    logger.info(
+        f"Asynchronous query without cloud fetch: {'✅ PASSED' if without_cloud_fetch_success else '❌ FAILED'}"
+    )
+
+    return with_cloud_fetch_success and without_cloud_fetch_success
+
+
+if __name__ == "__main__":
+    success = test_sea_async_query_exec()
+    sys.exit(0 if success else 1)