Merge pull request #3 from somratdutta/add-nessie-tests-1

CyberneTech · web-flow · commit b9dc1fbef8fd · 2025-09-10T16:06:38.000+05:30
Add nessie tests 1
diff --git a/tests/integration/compose/docker_compose_iceberg_nessie_catalog.yml b/tests/integration/compose/docker_compose_iceberg_nessie_catalog.yml
@@ -11,6 +11,7 @@ services:
       - nessie.catalog.default-warehouse=warehouse
       - nessie.catalog.warehouses.warehouse.location=s3://warehouse-rest/
       - nessie.catalog.service.s3.default-options.endpoint=http://minio:9000/
+      - nessie.catalog.service.s3.default-options.external-endpoint=http://127.0.0.1:9002/
       - nessie.catalog.service.s3.default-options.access-key=urn:nessie-secret:quarkus:nessie.catalog.secrets.access-key
       - nessie.catalog.service.s3.default-options.path-style-access=true
       - nessie.catalog.service.s3.default-options.auth-type=STATIC
@@ -35,10 +36,10 @@ services:
     networks:
       default:
         aliases:
-          - warehouse.minio
+          - warehouse-rest.minio
     ports:
-      - 9001
-      - 9000
+      - "9001:9001"
+      - "9002:9000"
     command: ["server", "/data", "--console-address", ":9001"]
 
   # TODO: move this code to cluster.py
@@ -58,4 +59,4 @@ services:
       /usr/bin/mc mb minio/warehouse-rest --ignore-existing;
       /usr/bin/mc policy set public minio/warehouse-rest;
       tail -f /dev/null
-      "
+      "
diff --git a/tests/integration/test_database_iceberg_nessie_catalog/configs/backups.xml b/tests/integration/test_database_iceberg_nessie_catalog/configs/backups.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <backups>
+        <allowed_path>/backups</allowed_path>
+    </backups>
+</clickhouse>
diff --git a/tests/integration/test_database_iceberg_nessie_catalog/test.py b/tests/integration/test_database_iceberg_nessie_catalog/test.py
@@ -96,15 +96,7 @@ def create_clickhouse_iceberg_database(
 
 def load_catalog_impl(started_cluster):
     minio_ip = started_cluster.get_instance_ip('minio')
-    s3_endpoint = f"http://{minio_ip}:9000"
-
-    # Add minio hostname mapping so PyIceberg can resolve 'minio' hostnames in table metadata
-    import subprocess
-    try:
-        subprocess.run(['bash', '-c', f'echo "{minio_ip} minio" >> /etc/hosts'], check=True)
-        print(f"Added minio hostname mapping: {minio_ip} minio")
-    except Exception as e:
-        print(f"Failed to add hostname mapping: {e}")
+    s3_endpoint = f"http://{minio_ip}:9002"
 
     return RestCatalog(
         name="my_catalog",
@@ -127,7 +119,7 @@ def started_cluster():
         cluster = ClickHouseCluster(__file__)
         cluster.add_instance(
             "node1",
-            main_configs=[],
+            main_configs=["configs/backups.xml"],
             user_configs=[],
             stay_alive=True,
             with_iceberg_catalog=True,
@@ -345,3 +337,168 @@ def record(key):
 
     assert 'aaa\naaa\naaa' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name}`").strip()
     assert 'bbb\nbbb\nbbb' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name_2}`").strip()
+
+def test_backup_database(started_cluster):
+    node = started_cluster.instances["node1"]
+    create_clickhouse_iceberg_database(started_cluster, node, "backup_database")
+
+    backup_id = uuid.uuid4().hex
+    backup_name = f"File('/backups/test_backup_{backup_id}/')"
+
+    node.query(f"BACKUP DATABASE backup_database TO {backup_name}")
+    node.query("DROP DATABASE backup_database SYNC")
+    assert "backup_database" not in node.query("SHOW DATABASES")
+
+    node.query(f"RESTORE DATABASE backup_database FROM {backup_name}", settings={"allow_experimental_database_iceberg": 1})
+    assert (
+        node.query("SHOW CREATE DATABASE backup_database")
+        == "CREATE DATABASE backup_database\\nENGINE = DataLakeCatalog(\\'http://nessie:19120/iceberg/\\', \\'minio\\', \\'[HIDDEN]\\')\\nSETTINGS catalog_type = \\'rest\\', warehouse = \\'warehouse\\', storage_endpoint = \\'http://minio:9000/warehouse-rest\\'\n"
+    )
+
+def test_timestamps(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    test_ref = f"test_list_tables_{uuid.uuid4()}"
+    table_name = f"{test_ref}_table"
+    test_namespace = (f"{test_ref}_namespace",)
+    test_table_identifier = (test_namespace[0], table_name)
+    
+    catalog = load_catalog_impl(started_cluster)
+    catalog.create_namespace(test_namespace)
+
+    simple_schema = Schema(
+        NestedField(
+            field_id=1, name="timestamp", field_type=TimestampType(), required=False
+        ),
+        NestedField(
+            field_id=2, name="timestamptz", field_type=TimestampType(), required=False
+        ),
+    )
+    
+    table = catalog.create_table(
+        test_table_identifier,
+        schema=simple_schema,
+        properties={"write.metadata.compression-codec": "none"},
+    )
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+
+    data = [
+        {
+            "timestamp": datetime(2024, 1, 1, hour=12, minute=0, second=0, microsecond=0),
+            "timestamptz": datetime(2024, 1, 1, hour=12, minute=0, second=0, microsecond=0),
+        }
+    ]
+    df = pa.Table.from_pylist(data)
+    table.append(df)
+
+    # Extract the table path from S3 location for ClickHouse Iceberg ENGINE configuration
+    # 
+    # The table metadata contains the full S3 URI which needs to be processed:
+    # table.metadata.location:  s3://warehouse-rest/<test_namespace>/<test_table_uuid>
+    # extracted_table_path: <test_namespace>/<test_table_uuid>
+    
+    table_metadata = table.metadata
+    table_location = table_metadata.location
+    if "warehouse-rest/" in table_location:
+        extracted_table_path = table_location.split("warehouse-rest/")[1]
+
+    result = node.query(f"SHOW CREATE TABLE {CATALOG_NAME}.`{test_namespace[0]}.{table_name}`")
+    assert result == f"CREATE TABLE {CATALOG_NAME}.`{test_namespace[0]}.{table_name}`\\n(\\n    `timestamp` Nullable(DateTime64(6)),\\n    `timestamptz` Nullable(DateTime64(6))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-rest/{extracted_table_path}/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
+    assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{test_namespace[0]}.{table_name}`") == "2024-01-01 12:00:00.000000\t2024-01-01 12:00:00.000000\n"
+
+def test_insert(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    catalog = load_catalog_impl(started_cluster)
+
+    test_ref = f"test_insert_{uuid.uuid4().hex[:8]}"
+    test_namespace = (f"{test_ref}_namespace",)
+
+    catalog.create_namespace(test_namespace)
+
+    test_table_name = f"{test_ref}_table"
+    test_table_identifier = (test_namespace[0], test_table_name)
+
+    schema = Schema(
+        NestedField(field_id=1, name="id", field_type=DoubleType(), required=False),
+        NestedField(field_id=2, name="data", field_type=StringType(), required=False),
+    )
+
+    table = catalog.create_table(
+        test_table_identifier,
+        schema=schema,
+        properties={"write.metadata.compression-codec": "none"},
+    )
+
+    data = [
+    {"id": float(i), "data": f"data_{i}"} for i in range(10)
+    ]
+    df = pa.Table.from_pylist(data)
+    table.append(df)
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+
+    result = node.query(f"SELECT * FROM {CATALOG_NAME}.`{test_namespace[0]}.{test_table_name}` ORDER BY id")
+    expected = "\n".join([f"{i}\tdata_{i}" for i in range(10)])
+    assert result.strip() == expected
+
+def test_create(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    catalog = load_catalog_impl(started_cluster)
+
+    test_ref = f"test_create_{uuid.uuid4().hex[:8]}"
+    test_namespace = (f"{test_ref}_namespace",)
+
+    catalog.create_namespace(test_namespace)
+
+    test_table_name = f"{test_ref}_table"
+    test_table_identifier = (test_namespace[0], test_table_name)
+
+    schema = Schema(
+        NestedField(field_id=1, name="id", field_type=DoubleType(), required=False),
+        NestedField(field_id=2, name="data", field_type=StringType(), required=False),
+    )
+
+    catalog.create_table(
+        test_table_identifier,
+        schema=schema,
+        properties={"write.metadata.compression-codec": "none"},
+    )
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+
+    result = node.query(f"SHOW TABLES FROM {CATALOG_NAME}")
+    assert test_table_name in result
+
+def test_drop_table(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    catalog = load_catalog_impl(started_cluster)
+
+    test_ref = f"test_drop_table_{uuid.uuid4().hex[:8]}"
+    test_namespace = (f"{test_ref}_namespace",)
+
+    catalog.create_namespace(test_namespace)
+
+    test_table_name = f"{test_ref}_table"
+    test_table_identifier = (test_namespace[0], test_table_name)
+
+    schema = Schema(
+        NestedField(field_id=1, name="id", field_type=DoubleType(), required=False),
+        NestedField(field_id=2, name="data", field_type=StringType(), required=False),
+    )
+
+    catalog.create_table(
+        test_table_identifier,
+        schema=schema,
+        properties={"write.metadata.compression-codec": "none"},
+    )
+
+    catalog.drop_table(test_table_identifier)
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+
+    result = node.query(f"SHOW TABLES FROM {CATALOG_NAME}")
+    assert test_table_name not in result