-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlog_gap_detector.sh
More file actions
200 lines (165 loc) · 5.86 KB
/
log_gap_detector.sh
File metadata and controls
200 lines (165 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/bin/bash
# Log Gap Detector - Detect timestamp gaps in log4j formatted log files
# Usage: ./gap_detector.sh <logfile> <threshold>
# Threshold format: 300, 300s, 5m, 2h
# This version of the script created with the help of Claude Sonnet 4
show_usage() {
cat << EOF
Usage: $0 <logfile> <threshold>
Detect timestamp gaps in log4j formatted log files.
Arguments:
logfile Path to the log file to analyze
threshold Gap threshold (300, 300s, 5m, 2h)
s = seconds (default), m = minutes, h = hours
Examples:
$0 app.log 5m # Detect gaps longer than 5 minutes
$0 app.log 300s # Detect gaps longer than 300 seconds
$0 app.log 2h # Detect gaps longer than 2 hours
$0 app.log 300 # Detect gaps longer than 300 seconds
Expected timestamp format: 2025-05-10 09:39:04,634
EOF
}
parse_threshold() {
local input="$1"
local number
local unit
# Extract number and unit using regex
if [[ $input =~ ^([0-9]+)([smh]?)$ ]]; then
number="${BASH_REMATCH[1]}"
unit="${BASH_REMATCH[2]}"
case "$unit" in
"s"|"") echo "$number" ;; # seconds (default)
"m") echo $((number * 60)) ;; # minutes
"h") echo $((number * 3600)) ;; # hours
*) echo "ERROR" ;;
esac
else
echo "ERROR"
fi
}
validate_inputs() {
local logfile="$1"
local threshold_input="$2"
local threshold_seconds="$3"
# Check for correct number of arguments
if [[ $# -lt 2 ]]; then
echo "Error: Missing required arguments" >&2
show_usage >&2
exit 1
fi
# Check file exists and is readable
if [[ ! -f "$logfile" ]]; then
echo "Error: File '$logfile' not found" >&2
exit 1
fi
if [[ ! -r "$logfile" ]]; then
echo "Error: File '$logfile' not readable" >&2
exit 1
fi
# Validate threshold conversion
if [[ "$threshold_seconds" == "ERROR" ]]; then
echo "Error: Invalid threshold format '$threshold_input'" >&2
echo "Use formats like: 300, 300s, 5m, 2h" >&2
exit 1
fi
# Check threshold is positive
if [[ $threshold_seconds -le 0 ]]; then
echo "Error: Threshold must be positive, got '$threshold_input'" >&2
exit 1
fi
}
main() {
# Parse command line arguments
local logfile="$1"
local threshold_input="$2"
# Show help if requested
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
show_usage
exit 0
fi
# Convert threshold to seconds
local threshold_seconds
threshold_seconds=$(parse_threshold "$threshold_input")
# Validate all inputs
validate_inputs "$logfile" "$threshold_input" "$threshold_seconds" "$@"
# Run the gap detection
echo "Analyzing '$logfile' for gaps larger than $threshold_input ($threshold_seconds seconds)..."
echo "Expected timestamp format: YYYY-MM-DD HH:MM:SS,mmm"
echo ""
# Execute awk script with threshold variable
awk -v threshold="$threshold_seconds" '
BEGIN {
previous_epoch = ""
previous_line = ""
gaps_found = 0
lines_processed = 0
malformed_skipped = 0
}
# Function to convert timestamp to epoch seconds
function timestamp_to_epoch(timestamp_str) {
# Extract timestamp portion (first 19 characters): 2025-06-16 07:18:08
ts = substr(timestamp_str, 1, 19)
# Split into date and time components
split(ts, datetime, " ")
split(datetime[1], date_parts, "-")
split(datetime[2], time_parts, ":")
year = date_parts[1]
month = date_parts[2]
day = date_parts[3]
hour = time_parts[1]
minute = time_parts[2]
second = time_parts[3]
# Use mktime to convert to epoch seconds
return mktime(year " " month " " day " " hour " " minute " " second)
}
# Process only lines that start with timestamp pattern
/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/ {
lines_processed++
# Extract and convert timestamp
current_epoch = timestamp_to_epoch($0)
# Skip if timestamp conversion failed (malformed)
if (current_epoch <= 0) {
malformed_skipped++
next
}
# Compare with previous timestamp if we have one
if (previous_epoch != "") {
gap = current_epoch - previous_epoch
# Detect gaps larger than threshold
if (gap > threshold) {
gaps_found++
print "=== GAP DETECTED ==="
print "Previous: " previous_line
print "Current: " $0
printf "Gap: %d seconds (%.1f minutes)\n", gap, gap/60
print ""
}
# Detect out-of-order timestamps (bonus feature)
else if (gap < 0) {
print "=== OUT OF ORDER DETECTED ==="
print "Previous: " previous_line
print "Current: " $0
printf "Backwards: %d seconds (%.1f minutes)\n", -gap, -gap/60
print ""
}
}
# Update previous values for next iteration
previous_epoch = current_epoch
previous_line = $0
}
# Skip all other lines silently (malformed entries)
END {
print "=== ANALYSIS COMPLETE ==="
print "Lines with timestamps processed: " lines_processed
if (malformed_skipped > 0) {
print "Malformed timestamp lines skipped: " malformed_skipped
}
print "Gaps found: " gaps_found
if (gaps_found == 0) {
print "No gaps larger than " threshold " seconds detected."
}
}
' "$logfile"
}
# Execute main function with all arguments
main "$@"