void-pointer-CS293-project/tocsv.sh at main · Aadeshveer/void-pointer-CS293-project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash

# Exit immediately if a command exits with a non-zero status.
set -e

# --- Configuration ---
LOG_DIR="logs"
OUTPUT_CSV="raw_performance_data.csv"

# --- Main Script ---

# Check if the logs directory exists
if [ ! -d "$LOG_DIR" ]; then
    echo "Error: Directory '$LOG_DIR' not found."
    exit 1
fi

echo "Step 1: Creating CSV file and writing the header..."
echo "testcase,block_size,query_type,execution_time_ms" > "$OUTPUT_CSV"

echo "Step 2: Processing each log file to extract every query time..."

# Loop through every .log file in the specified directory.
for file in "$LOG_DIR"/*.log; do

    filename=$(basename "$file")
    echo -n " -> Processing $filename..."

    # --- Extract metadata from filename (e.g., testcase0-0.0001.log) ---
    temp_name="${filename#testcase}"
    temp_name="${temp_name%.log}"
    testcase_num="${temp_name%%-*}"
    block_size="${temp_name#*-}"

    # --- Use grep and awk to process all relevant lines in the file ---
    # 1. Grep finds all lines with execution times.
    # 2. Awk processes each of those lines.
    #    - We pass shell variables (testcase_num, block_size) into awk using the -v flag.
    #    - It extracts the time and the query description.
    #    - It cleans the query description into a friendly name.
    #    - It prints the final, formatted CSV row for EACH line.
    grep "Execution time for" "$file" | awk -v tc="$testcase_num" -v bs="$block_size" '
    BEGIN { FS = ":" } # Set the field separator to a colon
    {
        # Extract the execution time (the number before "microseconds")
        # split($2, parts, " ");
        time = $2;

        # Extract and clean up the query type
        query = $1;
        gsub("Execution time for ", "", query);
        gsub(/K Nearest Neighours by Euclidean coordinates/, "KNN_Euclidean", query);
        gsub(/K Nearest Neighours by distance/, "KNN_Distance", query);
        gsub(/shortest Paths by Time/, "ShortestPath_Time", query);
        gsub(/shortest Paths by distance/, "ShortestPath_Distance", query);
        gsub(/Global Graph destructor/, "GlobalGraph_Destructor", query);
        gsub(/PreProcessing of Global Graph/, "PreProcessing", query);
        gsub(/ /, "_", query);

        # Print the CSV row for this single observation
        printf "%s,%s,%s,%f\n", tc, bs, query, time;
    }' >> "$OUTPUT_CSV"

    echo " Done."
done

echo "----------------------------------------------------"
echo "✅ Success! Raw performance data saved to: $OUTPUT_CSV"
echo "This file contains one row for every query executed."