Skip to content

Commit b0f785e

Browse files
authored
Merge pull request #21 from ClickHouse/single-store
Add SingleStore measurements
2 parents 126f0d8 + c78acdc commit b0f785e

25 files changed

+1028
-0
lines changed

singlestore/benchmark.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 4 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <RESULT_FILE_RUNTIMES> <RESULT_FILE_MEMORY_USAGE>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
ROOT_PASSWORD="$1"
11+
DB_NAME="$2"
12+
RESULT_FILE_RUNTIMES="$3"
13+
RESULT_FILE_MEMORY_USAGE="$4"
14+
15+
# Construct the query log file name using $DB_NAME
16+
QUERY_LOG_FILE="_query_log_${DB_NAME}.txt"
17+
18+
# Print the database name
19+
echo "Running queries on database: $DB_NAME"
20+
21+
# Run queries and log the output
22+
./run_queries.sh "$ROOT_PASSWORD" "$DB_NAME" 2>&1 | tee "$QUERY_LOG_FILE"

singlestore/count.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 3 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <TABLE_NAME>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
ROOT_PASSWORD="$1"
11+
DB_NAME="$2"
12+
TABLE_NAME="$3"
13+
14+
export MYSQL_PWD=${ROOT_PASSWORD}
15+
16+
mysql -h 127.0.0.1 -P 3306 -u root -e "SELECT count(*) FROM $DB_NAME.$TABLE_NAME"

singlestore/create_and_load.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 8 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <TABLE_NAME> <DDL_FILE> <DATA_DIRECTORY> <NUM_FILES> <SUCCESS_LOG> <ERROR_LOG>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
ROOT_PASSWORD="$1"
11+
DB_NAME="$2"
12+
TABLE_NAME="$3"
13+
DDL_FILE="$4"
14+
DATA_DIRECTORY="$5"
15+
NUM_FILES="$6"
16+
SUCCESS_LOG="$7"
17+
ERROR_LOG="$8"
18+
19+
# Validate arguments
20+
[[ ! -f "$DDL_FILE" ]] && { echo "Error: DDL file '$DDL_FILE' does not exist."; exit 1; }
21+
[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; }
22+
[[ ! "$NUM_FILES" =~ ^[0-9]+$ ]] && { echo "Error: NUM_FILES must be a positive integer."; exit 1; }
23+
24+
export MYSQL_PWD=${ROOT_PASSWORD}
25+
26+
echo "Creating database $DB_NAME"
27+
mysql -h 127.0.0.1 -P 3306 -u root -e "CREATE DATABASE IF NOT EXISTS $DB_NAME"
28+
29+
echo "Executing DDL for database $DB_NAME"
30+
mysql -h 127.0.0.1 -P 3306 -u root $DB_NAME < "$DDL_FILE"
31+
32+
echo "Loading data for database $DB_NAME"
33+
./load_data.sh "$ROOT_PASSWORD" "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG"

singlestore/data_size.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 3 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <TABLE_NAME>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
ROOT_PASSWORD="$1"
11+
DB_NAME="$2"
12+
TABLE_NAME="$3"
13+
14+
export MYSQL_PWD=${ROOT_PASSWORD}
15+
16+
mysql -h 127.0.0.1 -P 3306 -u root -e "SELECT sum(compressed_size) FROM information_schema.columnar_segments WHERE database_name = '$DB_NAME' AND table_name = '$TABLE_NAME'"

singlestore/ddl.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE bluesky
2+
(
3+
data JSON
4+
);
5+
-- Notes:
6+
-- - Not using data structures to speed up scans. In SingleStore, no sort keys or indexes can be created on JSON sub-columns.
7+
-- - The only physical optimization we use is 'use_seekable_json' but that is implicitly on: https://docs.singlestore.com/db/v8.9/create-a-database/columnstore/columnstore-seekability-using-json/
8+
-- - We _could_ run OPTIMIZE to force a merge but since we are also not doing this for other benchmarked databases, we omit that.

singlestore/drop_table.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 3 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <TABLE_NAME>"
6+
exit 1
7+
fi
8+
9+
ROOT_PASSWORD="$1"
10+
DB_NAME="$2"
11+
TABLE_NAME="$3"
12+
13+
echo "Dropping table: $DB_NAME.$TABLE_NAME"
14+
15+
export MYSQL_PWD=${ROOT_PASSWORD}
16+
mysql -h 127.0.0.1 -P 3306 -u root -e "DROP DATABASE IF EXISTS $DB_NAME"

singlestore/index_size.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 3 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DB_NAME> <TABLE_NAME>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
ROOT_PASSWORD="$1"
11+
DB_NAME="$2"
12+
TABLE_NAME="$3"
13+
14+
export MYSQL_PWD=${ROOT_PASSWORD}
15+
16+
mysql -h 127.0.0.1 -P 3306 -u root -e "SELECT sum(memory_use) FROM information_schema.index_statistics WHERE database_name = '$DB_NAME' AND table_name = '$TABLE_NAME'"

singlestore/install.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 2 ]]; then
5+
echo "Usage: $0 <LICENSE_KEY> <ROOT_PASSWORD>"
6+
exit 1
7+
fi
8+
9+
# Arguments
10+
LICENSE_KEY="$1"
11+
ROOT_PASSWORD="$2"
12+
13+
docker run -i --init \
14+
--name singlestore-ciab \
15+
-e LICENSE_KEY="${LICENSE_KEY}" \
16+
-e ROOT_PASSWORD="${ROOT_PASSWORD}" \
17+
-p 3306:3306 -p 8080:8080 \
18+
singlestore/cluster-in-a-box
19+
20+
docker start singlestore-ciab
21+
22+
while true
23+
do
24+
mysql -h 127.0.0.1 -P 3306 -u root --password="${ROOT_PASSWORD}" -e 'SELECT 1' && break
25+
sleep 1
26+
done

singlestore/load_data.sh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/bash
2+
3+
# Check if the required arguments are provided
4+
if [[ $# -lt 7 ]]; then
5+
echo "Usage: $0 <ROOT_PASSWORD> <DATA_DIRECTORY> <DB_NAME> <TABLE_NAME> <MAX_FILES> <SUCCESS_LOG> <ERROR_LOG>"
6+
exit 1
7+
fi
8+
9+
10+
# Arguments
11+
ROOT_PASSWORD="$1"
12+
DATA_DIRECTORY="$2"
13+
DB_NAME="$3"
14+
TABLE_NAME="$4"
15+
MAX_FILES="$5"
16+
SUCCESS_LOG="$6"
17+
ERROR_LOG="$7"
18+
19+
# Validate arguments
20+
[[ ! -d "$DATA_DIRECTORY" ]] && { echo "Error: Data directory '$DATA_DIRECTORY' does not exist."; exit 1; }
21+
[[ ! "$MAX_FILES" =~ ^[0-9]+$ ]] && { echo "Error: MAX_FILES must be a positive integer."; exit 1; }
22+
23+
export MYSQL_PWD=${ROOT_PASSWORD}
24+
25+
# Load data
26+
counter=0
27+
for file in $(ls "$DATA_DIRECTORY"/*.json.gz | head -n "$MAX_FILES"); do
28+
echo "Processing file: $file"
29+
30+
# Note: If one or more JSON documents in the currently processed file cannot be parsed (because of extremely deep nesting, line breaks
31+
# in unexpected places, etc.), then SingleStore will skip the _entire_ file. This unfortunately reduces the "data quality" metric
32+
# (= the number of successfully inserted JSON documents) quite a bit. SingleStore's LOAD statement comes with a SKIP PARSER ERRORS
33+
# clause that would theoretically allow to skip individual documents, but it is not supported for JSON
34+
# (https://www.singlestore.com/forum/t/pipeline-skip-parser-errors-with-json/2794).
35+
mysql --local-infile=1 -h 127.0.0.1 -P 3306 -u root -D $DB_NAME -e "LOAD DATA LOCAL INFILE \"$file\" INTO TABLE bluesky(data <- %) FORMAT JSON"
36+
37+
counter=$((counter + 1))
38+
if [[ $counter -ge $MAX_FILES ]]; then
39+
break
40+
fi
41+
done

singlestore/main.sh

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/bin/bash
2+
3+
# --------------------------------------------------------------------------------------------------------------------------------------------------------------
4+
# NOTE: Provide license key and PASSWORD here
5+
LICENSE_KEY=""
6+
ROOT_PASSWORD=""
7+
# --------------------------------------------------------------------------------------------------------------------------------------------------------------
8+
9+
# Default data directory
10+
DEFAULT_DATA_DIRECTORY=~/data/bluesky
11+
12+
# Allow the user to optionally provide the data directory as an argument
13+
DATA_DIRECTORY="${1:-$DEFAULT_DATA_DIRECTORY}"
14+
15+
# Define success and error log files
16+
SUCCESS_LOG="${2:-success.log}"
17+
ERROR_LOG="${3:-error.log}"
18+
19+
# Define prefix for output files
20+
OUTPUT_PREFIX="${4:-_m6i.8xlarge}"
21+
22+
# Check if the directory exists
23+
if [[ ! -d "$DATA_DIRECTORY" ]]; then
24+
echo "Error: Data directory '$DATA_DIRECTORY' does not exist."
25+
exit 1
26+
fi
27+
28+
echo "Select the dataset size to benchmark:"
29+
echo "1) 1m (default)"
30+
echo "2) 10m"
31+
echo "3) 100m"
32+
echo "4) 1000m"
33+
echo "5) all"
34+
read -p "Enter the number corresponding to your choice: " choice
35+
36+
# ./install.sh "${LICENSE_KEY}" "${ROOT_PASSWORD}"
37+
38+
benchmark() {
39+
local size=$1
40+
# Check DATA_DIRECTORY contains the required number of files to run the benchmark
41+
file_count=$(find "$DATA_DIRECTORY" -type f | wc -l)
42+
if (( file_count < size )); then
43+
echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count."
44+
exit 1
45+
fi
46+
./create_and_load.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky "ddl.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
47+
./total_size.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.total_size"
48+
./data_size.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.data_size"
49+
./index_size.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.index_size"
50+
./count.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m.count"
51+
./query_results.sh "${ROOT_PASSWORD}" "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.query_results"
52+
# ./physical_query_plans.sh "${ROOT_PASSWORD}" "bluesky_${size}m" | tee "${OUTPUT_PREFIX}_bluesky_${size}m.physical_query_plans"
53+
./benchmark.sh "${ROOT_PASSWORD}" "bluesky_${size}m" "${OUTPUT_PREFIX}_bluesky_${size}m.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m.results_memory_usage"
54+
./drop_table.sh "${ROOT_PASSWORD}" "bluesky_${size}m" bluesky
55+
}
56+
57+
case $choice in
58+
2)
59+
benchmark 10
60+
;;
61+
3)
62+
benchmark 100
63+
;;
64+
4)
65+
benchmark 1000
66+
;;
67+
5)
68+
benchmark 1
69+
benchmark 10
70+
benchmark 100
71+
benchmark 1000
72+
;;
73+
*)
74+
benchmark 1
75+
;;
76+
esac

0 commit comments

Comments
 (0)