Skip to content

Commit ec0ad5c

Browse files
authored
Merge pull request #29 from ClickHouse/dont-trash-my-system
Install ClickHouse in local directory instead of system
2 parents d976515 + 6bf3945 commit ec0ad5c

File tree

14 files changed

+36
-37
lines changed

14 files changed

+36
-37
lines changed

clickhouse/count.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ fi
1010
DB_NAME="$1"
1111
TABLE_NAME="$2"
1212

13-
clickhouse-client --database="$DB_NAME" --query "SELECT count() FROM '$TABLE_NAME';"
13+
./clickhouse client --database="$DB_NAME" --query "SELECT count() FROM '$TABLE_NAME';"

clickhouse/create_and_load.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ ERROR_LOG="$7"
2222

2323

2424
echo "Creating database $DB_NAME"
25-
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS $DB_NAME"
25+
./clickhouse client --query "CREATE DATABASE IF NOT EXISTS $DB_NAME"
2626

2727
echo "Executing DDL for database $DB_NAME"
28-
clickhouse-client --database="$DB_NAME" --enable_json_type=1 --multiquery < "$DDL_FILE"
28+
./clickhouse client --database="$DB_NAME" --enable_json_type=1 --multiquery < "$DDL_FILE"
2929

3030
echo "Loading data for database $DB_NAME"
3131
./load_data.sh "$DATA_DIRECTORY" "$DB_NAME" "$TABLE_NAME" "$NUM_FILES" "$SUCCESS_LOG" "$ERROR_LOG"

clickhouse/data_size.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ fi
1010
DB_NAME="$1"
1111
TABLE_NAME="$2"
1212

13-
clickhouse-client --query "SELECT sum(data_compressed_bytes) FROM system.parts WHERE database = '$DB_NAME' AND table = '$TABLE_NAME' AND active"
13+
./clickhouse client --query "SELECT sum(data_compressed_bytes) FROM system.parts WHERE database = '$DB_NAME' AND table = '$TABLE_NAME' AND active"

clickhouse/drop_table.sh

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
#!/bin/bash
22

3-
# Check if the required arguments are provided
4-
if [[ $# -lt 2 ]]; then
5-
echo "Usage: $0 <DB_NAME> <TABLE_NAME>"
6-
exit 1
7-
fi
3+
echo "Stopping ClickHouse"
4+
pidof clickhouse && kill -9 `pidof clickhouse`
85

9-
DB_NAME="$1"
10-
TABLE_NAME="$2"
11-
12-
echo "Dropping table: $DB_NAME.$TABLE_NAME"
13-
14-
clickhouse-client --database="$DB_NAME" --query "DROP TABLE IF EXISTS $TABLE_NAME"
6+
# 'DROP TABLE' has a build-in safety mechanism that prevents users from dropping large tables. We hit that with large
7+
# numbers of ingested data. Instead, make our lifes easy and remove the persistence manually.
8+
echo "Dropping all data"
9+
rm -rf data/ metadata/ store/

clickhouse/index_size.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ fi
1010
DB_NAME="$1"
1111
TABLE_NAME="$2"
1212

13-
clickhouse-client --query "SELECT sum(primary_key_size) + sum(marks_bytes) FROM system.parts WHERE database = '$DB_NAME' AND table = '$TABLE_NAME' AND active"
13+
./clickhouse client --query "SELECT sum(primary_key_size) + sum(marks_bytes) FROM system.parts WHERE database = '$DB_NAME' AND table = '$TABLE_NAME' AND active"

clickhouse/index_usage.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ cat queries.sql | while read -r query; do
1818
echo "Index usage for query Q$QUERY_NUM:"
1919
echo
2020

21-
clickhouse-client --database="$DB_NAME" --query="EXPLAIN indexes=1 $query"
21+
./clickhouse client --database="$DB_NAME" --query="EXPLAIN indexes=1 $query"
2222

2323
# Increment the query number
2424
QUERY_NUM=$((QUERY_NUM + 1))
25-
done;
25+
done;

clickhouse/install.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
11
#!/bin/bash
22

33
curl https://clickhouse.com/ | sh
4-
sudo ./clickhouse install --noninteractive
5-
sudo clickhouse start
6-
7-
while true
8-
do
9-
clickhouse-client --query "SELECT 1" && break
10-
sleep 1
11-
done
12-

clickhouse/load_data.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ for file in $(ls "$DATA_DIRECTORY"/*.json.gz | head -n "$MAX_FILES"); do
3939
fi
4040

4141
# Attempt the first import
42-
clickhouse-client --query="INSERT INTO $DB_NAME.$TABLE_NAME SETTINGS min_insert_block_size_rows = 1_000_000, min_insert_block_size_bytes = 0 FORMAT JSONAsObject" < "$uncompressed_file"
42+
./clickhouse client --query="INSERT INTO $DB_NAME.$TABLE_NAME SETTINGS min_insert_block_size_rows = 1_000_000, min_insert_block_size_bytes = 0 FORMAT JSONAsObject" < "$uncompressed_file"
4343
first_attempt=$?
4444

4545
# Check if the first import was successful
@@ -51,7 +51,7 @@ for file in $(ls "$DATA_DIRECTORY"/*.json.gz | head -n "$MAX_FILES"); do
5151

5252
echo "Processing $file... again..."
5353
# Attempt the second import with a different command
54-
clickhouse-client --query="INSERT INTO $DB_NAME.$TABLE_NAME SETTINGS min_insert_block_size_rows = 1_000_000, min_insert_block_size_bytes = 0, input_format_allow_errors_num = 1_000_000_000, input_format_allow_errors_ratio=1 FORMAT JSONAsObject" < "$uncompressed_file"
54+
./clickhouse client --query="INSERT INTO $DB_NAME.$TABLE_NAME SETTINGS min_insert_block_size_rows = 1_000_000, min_insert_block_size_bytes = 0, input_format_allow_errors_num = 1_000_000_000, input_format_allow_errors_ratio=1 FORMAT JSONAsObject" < "$uncompressed_file"
5555
second_attempt=$?
5656

5757
# Check if the second import was successful

clickhouse/main.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ benchmark() {
3838
echo "Error: Not enough files in '$DATA_DIRECTORY'. Required: $size, Found: $file_count."
3939
exit 1
4040
fi
41+
./start.sh
4142
./create_and_load.sh "bluesky_${size}m_${suffix}" bluesky "ddl_${suffix}.sql" "$DATA_DIRECTORY" "$size" "$SUCCESS_LOG" "$ERROR_LOG"
4243
./total_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.total_size"
4344
./data_size.sh "bluesky_${size}m_${suffix}" bluesky | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.data_size"
@@ -47,7 +48,7 @@ benchmark() {
4748
./index_usage.sh "bluesky_${size}m_${suffix}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.index_usage"
4849
./physical_query_plans.sh "bluesky_${size}m_${suffix}" | tee "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.physical_query_plans"
4950
./benchmark.sh "bluesky_${size}m_${suffix}" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_runtime" "${OUTPUT_PREFIX}_bluesky_${size}m_${suffix}.results_memory_usage"
50-
./drop_table.sh "bluesky_${size}m_${suffix}" bluesky
51+
./drop_table.sh # also stops ClickHouse
5152
}
5253

5354
case $choice in

clickhouse/physical_query_plans.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ cat queries.sql | while read -r query; do
1818
echo "Physical query plan for query Q$QUERY_NUM:"
1919
echo
2020

21-
clickhouse-client --database="$DB_NAME" --query="EXPLAIN PIPELINE $query"
21+
./clickhouse client --database="$DB_NAME" --query="EXPLAIN PIPELINE $query"
2222

2323
# Increment the query number
2424
QUERY_NUM=$((QUERY_NUM + 1))
25-
done;
25+
done;

0 commit comments

Comments
 (0)