rapidsai · misiugodfrey · Feb 13, 2026 · Nov 19, 2025 · Nov 24, 2025 · Nov 24, 2025
@@ -48,6 +48,8 @@ optimizer.generate-domain-filters=true
 # Upper limit for broadcasted table size to avoid memory blowups.
 # See: https://github.com/prestodb/presto/issues/22161#issuecomment-1994128619
 join-max-broadcast-table-size={{ .JoinMaxBroadcastTableSizeMb }}MB
+# Default is AUTOMATIC, ucx exchange does not support BROADCAST partition type.
+join-distribution-type=AUTOMATIC
 
 # Client request timeout to avoid hung queries.
 query.client.timeout=30m

@@ -34,3 +34,8 @@ single-node-execution-enabled=true
 # Enable cuDF (CPU mode will ignore this setting)
 cudf.enabled=true
 cudf.exchange=false
+# Port number currently must be exactly 3 more than server port (ignored if cudf.exchange is false)
+cudf.exchange.server.port=8083
+cudf.memory_resource=async
+
+async-data-cache-enabled=false
@@ -1,18 +1,7 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2025, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
 
@@ -42,36 +31,35 @@ if [ ! -x "${SCRIPT_DIR}/../pbench/pbench" ]; then
 fi
 
 function duplicate_worker_configs() {
-  echo "Duplicating worker configs for GPU ID $1"
-  local worker_config="${CONFIG_DIR}/etc_worker_${1}"
+  local worker_id=$1
+  echo "Duplicating worker configs for GPU ID $worker_id"
+  local worker_config="${CONFIG_DIR}/etc_worker_${worker_id}"
+  local worker_native_config="${worker_config}/config_native.properties"
   local coord_config="${CONFIG_DIR}/etc_coordinator"
+  local coord_native_config="${coord_config}/config_native.properties"
+  local http_port="10$(printf "%02d\n" "$worker_id")0"
+  local exch_port="10$(printf "%02d\n" "$worker_id")3"
   rm -rf ${worker_config}
   cp -r ${CONFIG_DIR}/etc_worker ${worker_config}
 
   # Single node execution needs to be disabled if we are running multiple workers.
   if [[ ${NUM_WORKERS} -gt 1 ]]; then
-    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" \
-        ${coord_config}/config_native.properties
-    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" \
-	${worker_config}/config_native.properties
-  # make cudf.exchange=true if we are running multiple workers
-    sed -i "s+cudf.exchange=false+cudf.exchange=true+g" ${worker_config}/config_native.properties
+    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" ${coord_native_config}
+    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" ${worker_native_config}
+    # make cudf.exchange=true if we are running multiple workers
+    sed -i "s+cudf.exchange=false+cudf.exchange=true+g" ${worker_native_config}
+    # make join-distribution-type=PARTITIONED if we are running multiple workers
+    # (ucx exchange does not currently support BROADCAST partition type)
+    sed -i "s+join-distribution-type=.*+join-distribution-type=PARTITIONED+g" ${coord_native_config}
   fi
-  echo "join-distribution-type=PARTITIONED" >> ${coord_config}/config_native.properties
 
   # Each worker node needs to have it's own http-server port.  This isn't used, but
   # the cudf.exchange server port is currently hard-coded to be the server port +3
   # and that needs to be unique for each worker.
-  sed -i "s+http-server\.http\.port.*+http-server\.http\.port=80${1}0+g" \
-      ${worker_config}/config_native.properties
-  sed -i "s+cudf.exchange.server.port=.*+cudf.exchange.server.port=80${1}3+g" \
-      ${worker_config}/config_native.properties
-  if ! grep -q "^cudf.exchange.server.port=80${1}3" ${worker_config}/config_native.properties; then
-    echo "cudf.exchange.server.port=80${1}3" >> ${worker_config}/config_native.properties
-  fi
-  echo "async-data-cache-enabled=false" >> ${worker_config}/config_native.properties
+  sed -i "s+http-server\.http\.port.*+http-server\.http\.port=${http_port}+g" ${worker_native_config}
+  sed -i "s+cudf.exchange.server.port=.*+cudf.exchange.server.port=${exch_port}+g" ${worker_native_config}
   # Give each worker a unique id.
-  sed -i "s+node\.id.*+node\.id=worker_${1}+g" ${worker_config}/node.properties
+  sed -i "s+node\.id.*+node\.id=worker_${worker_id}+g" ${worker_config}/node.properties
 }
 
 # get host values
@@ -86,7 +74,7 @@ if [[ -z ${VARIANT_TYPE} || ! ${VARIANT_TYPE} =~ ^(cpu|gpu|java)$ ]]; then
 fi
 if [[ -z ${VCPU_PER_WORKER} ]]; then
   if [[ "${VARIANT_TYPE}" == "gpu" ]]; then
-      VCPU_PER_WORKER=2
+    VCPU_PER_WORKER=2
   else
     VCPU_PER_WORKER=${NPROC}
   fi

@@ -30,6 +30,7 @@ OPTIONS:
                             stored inside a directory under the --output-dir path with a name matching the tag name.
                             Tags must contain only alphanumeric and underscore characters.
     -p, --profile           Enable profiling of benchmark queries.
+    --skip-drop-cache       Skip dropping system caches before each benchmark query (dropped by default).
     -m, --metrics           Collect detailed metrics from Presto REST API after each query.
                             Metrics are stored in query-specific directories.
 
@@ -146,6 +147,10 @@ parse_args() {
         PROFILE=true
         shift
         ;;
+      --skip-drop-cache)
+        SKIP_DROP_CACHE=true
+        shift
+        ;;
       -m|--metrics)
         METRICS=true
         shift
@@ -220,6 +225,10 @@ if [[ "${METRICS}" == "true" ]]; then
   PYTEST_ARGS+=("--metrics")
 fi
 
+if [[ "${SKIP_DROP_CACHE}" == "true" ]]; then
+  PYTEST_ARGS+=("--skip-drop-cache")
+fi
+
 # Compute the directory where this script resides
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
@@ -236,8 +245,9 @@ source "${SCRIPT_DIR}/common_functions.sh"
 
 wait_for_worker_node_registration "$HOST_NAME" "$PORT"
 
+echo "Running bench"
 export PRESTO_IMAGE_TAG="${USER:-latest}"
 echo "Using PRESTO_IMAGE_TAG: $PRESTO_IMAGE_TAG"
 
 BENCHMARK_TEST_DIR=${TEST_DIR}/performance_benchmarks
-pytest -q ${BENCHMARK_TEST_DIR}/${BENCHMARK_TYPE}_test.py ${PYTEST_ARGS[*]}
+pytest -q -s ${BENCHMARK_TEST_DIR}/${BENCHMARK_TYPE}_test.py ${PYTEST_ARGS[*]}
@@ -0,0 +1,90 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Defaults
+KVIKIO_ARRAY=(8)
+DRIVERS_ARRAY=(2)
+WORKERS_ARRAY=(1)
+SCHEMA_ARRAY=()
+parse_args() {
+  while [[ $# -gt 0 ]]; do
+      case $1 in
+        -k|--kvikio-threads)
+            if [[ -n $2 ]]; then
+                IFS=',' read -ra KVIKIO_ARRAY <<< "$2"
+                shift 2
+            else
+                echo "Error: --kvikio-threads requires a value"
+                exit 1
+            fi
+            ;;
+        -d|--num-drivers)
+            if [[ -n $2 ]]; then
+                IFS=',' read -ra DRIVERS_ARRAY <<< "$2"
+                shift 2
+            else
+                echo "Error: --kvikio-threads requires a value"
+                exit 1
+            fi
+            ;;
+        -w|--num-workers)
+            if [[ -n $2 ]]; then
+                IFS=',' read -ra WORKERS_ARRAY <<< "$2"
+                shift 2
+            else
+                echo "Error: --num-workers requires a value"
+                exit 1
+            fi
+            ;;
+        -s|--schemas)
+            if [[ -n $2 ]]; then
+                IFS=',' read -ra SCHEMA_ARRAY <<< "$2"
+                shift 2
+            else
+                echo "Error: --schemas requires a value"
+                exit 1
+            fi
+            ;;
+        --data-dir)
+            if [[ -n $2 ]]; then
+                PRESTO_DATA_DIR="$2"
+                shift 2
+            else
+                echo "Error: --data-dir requires a value"
+                exit 1
+            fi
+            ;;
+        *)
+            echo "Error: Unknown argument $1"
+            print_help
+            exit 1
+            ;;
+    esac
+  done
+}
+
+parse_args "$@"
+
+if [[ ${#SCHEMA_ARRAY[@]} -eq 0 ]]; then
+    echo "Error: --schemas is required. Provide a comma-separated list of schema names."
+    exit 1
+fi
+
+if [[ -z ${PRESTO_DATA_DIR} ]]; then
+    echo "Error: --data-dir is required or PRESTO_DATA_DIR must be set in the environment."
+    exit 1
+fi
+
+for schema in "${SCHEMA_ARRAY[@]}"; do
+    for kvikio in "${KVIKIO_ARRAY[@]}"; do
+        for drivers in "${DRIVERS_ARRAY[@]}"; do
+            for workers in "${WORKERS_ARRAY[@]}"; do
+                    echo "Running combo: num_workers = $workers, kvikio_threads = $kvikio, num_drivers = $drivers, schema = $schema"
+                    ./start_native_gpu_presto.sh -w $workers --kvikio-threads $kvikio --num-drivers $drivers
+                    ./run_benchmark.sh -b tpch -s ${schema} --tag "${schema}_${workers}wk_${drivers}dr_${kvikio}kv"
+                    ./stop_presto.sh
+            done
+        done
+    done
+done
@@ -6,6 +6,50 @@
 import prestodb
 
 
+def check_tables_analyzed(presto_cursor, schema_name):
+    """Check that ANALYZE TABLE has been run on all tables in the given schema.
+
+    Verifies that table statistics exist by checking if SHOW STATS FOR each table
+    returns a non-null row_count. Returns True if all tables have statistics,
+    raises an error otherwise.
+
+    Args:
+        presto_cursor: Presto database cursor
+        schema_name: Name of the schema containing tables to check
+    """
+    tables = presto_cursor.execute(f"SHOW TABLES FROM hive.{schema_name}").fetchall()
+    table_names = [table_name for (table_name,) in tables]
+
+    if not table_names:
+        raise RuntimeError(f"No tables found in schema '{schema_name}'")
+
+    tables_missing_stats = []
+    for table_name in table_names:
+        presto_cursor.execute(f"SHOW STATS FOR hive.{schema_name}.{table_name}")
+        # Find column indices from the cursor description.
+        col_names = [desc[0] for desc in presto_cursor.description]
+        distinct_idx = col_names.index("distinct_values_count")
+        col_name_idx = col_names.index("column_name")
+
+        stats = presto_cursor.fetchall()
+        # Column rows (where column_name is not None) should have a non-null
+        # distinct_values_count if ANALYZE TABLE has been run. Check that at
+        # least one column has this statistic populated.
+        column_rows = [row for row in stats if row[col_name_idx] is not None]
+        has_stats = any(row[distinct_idx] is not None for row in column_rows)
+        if not column_rows or not has_stats:
+            tables_missing_stats.append(table_name)
+
+    if tables_missing_stats:
+        missing = ", ".join(tables_missing_stats)
+        raise RuntimeError(
+            f"ANALYZE TABLE has not been run on the following tables in schema "
+            f"'{schema_name}': {missing}. "
+            f"Run analyze_tables.sh on a CPU Presto instance before benchmarking."
+        )
+    print(f"[Analyze] All {len(table_names)} table(s) in schema '{schema_name}' have statistics.")
+
+
 def analyze_tables(presto_cursor, schema_name, verbose=False):
     """Analyze all tables in the given schema to collect statistics.
 
@@ -66,13 +110,23 @@ def analyze_tables(presto_cursor, schema_name, verbose=False):
     parser.add_argument("--port", type=int, default=8080, help="Presto coordinator port (default: 8080)")
     parser.add_argument("--user", type=str, default="test_user", help="Presto user (default: test_user)")
     parser.add_argument("-v", "--verbose", action="store_true", default=False, help="Enable verbose output")
+    parser.add_argument(
+        "--check-only",
+        action="store_true",
+        default=False,
+        help="Only check if tables have been analyzed (do not run ANALYZE)",
+    )
+
     args = parser.parse_args()
 
     conn = prestodb.dbapi.connect(host=args.host, port=args.port, user=args.user, catalog="hive")
     cursor = conn.cursor()
 
     try:
-        analyze_tables(cursor, args.schema_name, verbose=args.verbose)
+        if args.check_only:
+            check_tables_analyzed(cursor, args.schema_name)
+        else:
+            analyze_tables(cursor, args.schema_name, verbose=args.verbose)
     finally:
         cursor.close()
         conn.close()
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+import subprocess
+
+
+def drop_cache():
+    """Drop system caches using a privileged Docker container.
+
+    This runs a Docker container with elevated privileges to clear
+    the system page cache, dentries, and inodes by writing to
+    /proc/sys/vm/drop_caches.
+    """
+    command = [
+        "docker",
+        "run",
+        "--rm",
+        "--privileged",
+        "--gpus",
+        "all",
+        "alpine:latest",
+        "sh",
+        "-c",
+        "free; echo drop_caches; echo 3 > /proc/sys/vm/drop_caches; free",
+    ]
+
+    result = subprocess.run(command, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"drop_cache returned error code: {result.returncode}, stdout: {result.stdout}, stderr: {result.stderr}"
+        )
@@ -8,11 +8,29 @@
 
 from ..common.fixtures import tpcds_queries as tpcds_queries
 from ..common.fixtures import tpch_queries as tpch_queries
+from ..integration_tests.analyze_tables import check_tables_analyzed
 from .benchmark_keys import BenchmarkKeys
+from .cache_utils import drop_cache
 from .metrics_collector import collect_metrics
 from .profiler_utils import start_profiler, stop_profiler
 
 
+@pytest.fixture(scope="session", autouse=True)
+def verify_tables_analyzed(request):
+    """Session-scoped setup that verifies ANALYZE TABLE has been run on all tables."""
+    hostname = request.config.getoption("--hostname")
+    port = request.config.getoption("--port")
+    user = request.config.getoption("--user")
+    schema = request.config.getoption("--schema-name")
+    conn = prestodb.dbapi.connect(host=hostname, port=port, user=user, catalog="hive", schema=schema)
+    cursor = conn.cursor()
+    try:
+        check_tables_analyzed(cursor, schema)
+    finally:
+        cursor.close()
+        conn.close()
+
+
 @pytest.fixture(scope="module")
 def presto_cursor(request):
     hostname = request.config.getoption("--hostname")
@@ -31,6 +49,17 @@ def benchmark_result_collector(request):
     request.session.benchmark_results = benchmark_results
 
 
+@pytest.fixture(scope="session", autouse=True)
+def drop_cache_once(request):
+    """Session-scoped fixture that drops the cache once at the start of the benchmark run."""
+    drop_cache_enabled = not request.config.getoption("--skip-drop-cache")
+    if drop_cache_enabled:
+        drop_cache()
+        print("[Cache] System cache dropped successfully.")
+    else:
+        print("[Cache] Skipping cache drop (--skip-drop-cache flag set).")
+
+
 @pytest.fixture(scope="module")
 def benchmark_queries(request, tpch_queries, tpcds_queries):  # noqa: F811
     if request.node.obj.BENCHMARK_TYPE == "tpch":