-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfind_dups.sh
More file actions
executable file
·123 lines (107 loc) · 4.17 KB
/
find_dups.sh
File metadata and controls
executable file
·123 lines (107 loc) · 4.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
set -euo pipefail
# Flag to track if we are in the START_NODES section
inside_nodes=false
# Arrays to track unique and duplicate hostnames
declare -A seen_hostnames
declare -A duplicate_hostnames
declare -A unique_ids
# Output file where results will be stored
my_name=$(basename "$0" | awk -F. '{print $1}')
output_file="${my_name}.csv"
duplicate_file="duplicates.csv"
duplicate_count_file="duplicate_counts.csv"
file_path="/var/tmp/ibdiagnet2/ibdiagnet2.db_csv"
# Function to show help message
show_help() {
echo "Usage: ${my_name}.sh [options]"
echo "Options:"
echo " -h, --help Show this help message and exit"
echo " -f, --file FILE Specify the path to the ibdiagnet.db_csv file"
echo " default: /var/tmp/ibdiagnet2/ibdiagnet2.db_csv"
echo ""
echo "This script processes the specified ibdiagnet.db_csv file to extract hostnames and their associated vendor card IDs."
echo "It identifies unique hostnames and tracks duplicate hostnames along with their unique IDs."
echo "Output is saved in three files:"
echo " - output.csv: Contains unique hostnames and their vendor card IDs."
echo " - duplicates.csv: Contains duplicate hostnames and their unique IDs."
echo " - duplicate_counts.csv: Contains duplicate hostnames, the count of duplicates, and their unique IDs."
echo ""
exit 0
}
# Parse command-line options
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
show_help
;;
-f|--file)
file_path="$2"
shift 2
;;
*)
echo "Unknown option: $1"
show_help
exit 1
;;
esac
done
# Check if the file exists
if [ ! -f "$file_path" ]; then
echo "Error: File does not exist. Exiting."
exit 1
fi
# Write headers for the CSV files
echo "Hostname,Vendor Card ID" > "$output_file"
echo "Hostname,Unique ID" > "$duplicate_file"
echo "Hostname,Duplicate Count,Unique IDs" > "$duplicate_count_file"
# Process the file
while IFS=, read -r node_desc col2 col3 col4 col5 col6 col7 col8 col9 col10 col11 col12 col13; do
# Check for START_NODES
if [[ "$node_desc" == "START_NODES"* ]]; then
inside_nodes=true
continue
fi
# Check for END_NODES and stop processing when found
if [[ "$node_desc" == "END_NODES"* ]]; then
inside_nodes=false
break # Exit the loop after END_NODES is encountered
fi
# Skip the NodeDesc line after START_NODES
if $inside_nodes && [[ "$node_desc" == NodeDesc* ]]; then
continue
fi
# Process lines inside the START_NODES and END_NODES block
if $inside_nodes; then
# Extract the hostname by stripping the 'mlx_' part of the first column (node_desc)
hostname="${node_desc//mlx_/}"
# Extract the vendor card ID from column 9 (index 8)
vendor_card_id="$col9"
# Extract the unique ID from column 6 (strip '0x')
unique_id="${col6#0x}"
# Check if this hostname has already been seen
if [[ -z "${seen_hostnames[$hostname]+x}" ]]; then
# If it's the first time we see this hostname, add it to the main file
echo "$hostname,$vendor_card_id" >> "$output_file"
seen_hostnames["$hostname"]=1
else
# If it's a duplicate, add it to the duplicates file
echo "$hostname,$unique_id" >> "$duplicate_file"
duplicate_hostnames["$hostname"]=1
# Store unique IDs associated with the duplicate hostname
unique_ids["$hostname"]="${unique_ids[$hostname]:-},$unique_id"
fi
fi
done < "$file_path"
# Populate the duplicate counts file
for hostname in "${!duplicate_hostnames[@]}"; do
ids="${unique_ids[$hostname]}"
# Count the number of duplicate IDs (commas + 1, but leading comma means just count commas)
count=$(echo "$ids" | tr -cd ',' | wc -c)
echo "$hostname,$count,$ids" >> "$duplicate_count_file"
done
# Report the number of unique and duplicate hostnames
echo "Processing complete."
echo "Unique hostnames saved to: $output_file"
echo "Duplicate hostnames saved to: $duplicate_file"
echo "Duplicate counts saved to: $duplicate_count_file"