-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtocsv.sh
More file actions
69 lines (55 loc) · 2.52 KB
/
tocsv.sh
File metadata and controls
69 lines (55 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
# Exit immediately if a command exits with a non-zero status.
set -e
# --- Configuration ---
LOG_DIR="logs"
OUTPUT_CSV="raw_performance_data.csv"
# --- Main Script ---
# Check if the logs directory exists
if [ ! -d "$LOG_DIR" ]; then
echo "Error: Directory '$LOG_DIR' not found."
exit 1
fi
echo "Step 1: Creating CSV file and writing the header..."
echo "testcase,block_size,query_type,execution_time_ms" > "$OUTPUT_CSV"
echo "Step 2: Processing each log file to extract every query time..."
# Loop through every .log file in the specified directory.
for file in "$LOG_DIR"/*.log; do
filename=$(basename "$file")
echo -n " -> Processing $filename..."
# --- Extract metadata from filename (e.g., testcase0-0.0001.log) ---
temp_name="${filename#testcase}"
temp_name="${temp_name%.log}"
testcase_num="${temp_name%%-*}"
block_size="${temp_name#*-}"
# --- Use grep and awk to process all relevant lines in the file ---
# 1. Grep finds all lines with execution times.
# 2. Awk processes each of those lines.
# - We pass shell variables (testcase_num, block_size) into awk using the -v flag.
# - It extracts the time and the query description.
# - It cleans the query description into a friendly name.
# - It prints the final, formatted CSV row for EACH line.
grep "Execution time for" "$file" | awk -v tc="$testcase_num" -v bs="$block_size" '
BEGIN { FS = ":" } # Set the field separator to a colon
{
# Extract the execution time (the number before "microseconds")
# split($2, parts, " ");
time = $2;
# Extract and clean up the query type
query = $1;
gsub("Execution time for ", "", query);
gsub(/K Nearest Neighours by Euclidean coordinates/, "KNN_Euclidean", query);
gsub(/K Nearest Neighours by distance/, "KNN_Distance", query);
gsub(/shortest Paths by Time/, "ShortestPath_Time", query);
gsub(/shortest Paths by distance/, "ShortestPath_Distance", query);
gsub(/Global Graph destructor/, "GlobalGraph_Destructor", query);
gsub(/PreProcessing of Global Graph/, "PreProcessing", query);
gsub(/ /, "_", query);
# Print the CSV row for this single observation
printf "%s,%s,%s,%f\n", tc, bs, query, time;
}' >> "$OUTPUT_CSV"
echo " Done."
done
echo "----------------------------------------------------"
echo "✅ Success! Raw performance data saved to: $OUTPUT_CSV"
echo "This file contains one row for every query executed."