-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
174 lines (144 loc) · 7.3 KB
/
app.py
File metadata and controls
174 lines (144 loc) · 7.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import re
from flask import Flask, render_template, request, redirect
app = Flask(__name__)
def parse_modsec_log(log_content):
"""
Robust parser that scans entire transaction blocks for patterns
instead of relying on strict line-by-line structures.
"""
entries = []
# Split by the transaction start boundary (--ID-A--)
# We ignore the very first split if it's empty
# The regex captures the boundary so we can keep the ID
chunks = re.split(r'(--[a-zA-Z0-9]+-A--)', log_content)
current_entry = None
for i in range(len(chunks)):
chunk = chunks[i].strip()
if not chunk:
continue
# Check if this chunk is a Boundary Marker
header_match = re.match(r'--([a-zA-Z0-9]+)-A--', chunk)
if header_match:
# We found a new start.
# 1. Save the previous entry if it exists
if current_entry:
# Final score calculation before saving
max_score = max(current_entry['score_in'], current_entry['score_out'])
# Fallback: if score is 0 but we found severity ratings, verify logic
if max_score == 0 and current_entry['severity_points'] > 0:
max_score = current_entry['severity_points']
current_entry['score'] = max_score
entries.append(current_entry)
# 2. Start a new entry
current_entry = {
'id': header_match.group(1),
'timestamp': 'Unknown Time',
'source_ip': 'Unknown',
'dest_ip': 'Unknown',
'method': 'Unknown',
'request_uri': '',
'user_agent': '',
'messages': [],
'score_in': 0,
'score_out': 0,
'score': 0,
'severity_points': 0, # Backup counter
'raw_request': ''
}
elif current_entry:
# This chunk is the CONTENT of the transaction (Parts B, H, Z, etc.)
# We treat the whole text block as one searchable string.
# --- 1. Extract Headers (Timestamp & IPs) ---
# Looks for: [25/Nov/2023:14:00:00 +0000] 1.2.3.4 1234 5.6.7.8 80
if current_entry['source_ip'] == 'Unknown':
header_data = re.search(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}\s?[+\-]\d{4})\].*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+\d+\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', chunk)
if header_data:
current_entry['timestamp'] = header_data.group(1)
current_entry['source_ip'] = header_data.group(2)
current_entry['dest_ip'] = header_data.group(3)
# --- 2. Extract Request Info (Method & URI) ---
# We look for the Part B header and take the lines after it
if f"--{current_entry['id']}-B--" in chunk:
# Find the boundary, extract text until next boundary
part_b_match = re.search(f"--{current_entry['id']}-B--\n(.*?)\n--{current_entry['id']}-", chunk, re.DOTALL)
if part_b_match:
raw_req = part_b_match.group(1)
current_entry['raw_request'] = raw_req.strip()
# First line is usually: GET /path HTTP/1.1
first_line = raw_req.split('\n')[0].strip()
parts = first_line.split()
if len(parts) >= 2:
current_entry['method'] = parts[0]
current_entry['request_uri'] = parts[1]
# User Agent
ua_match = re.search(r'(?i)^User-Agent:\s*(.+)$', raw_req, re.MULTILINE)
if ua_match:
current_entry['user_agent'] = ua_match.group(1)
# --- 3. Extract Messages & Severities ---
# Find all lines starting with "Message:"
msg_matches = re.findall(r'^Message:\s*(.+)$', chunk, re.MULTILINE)
for msg in msg_matches:
# Extract Rule ID
id_match = re.search(r'\[id "(\d+)"\]', msg)
rule_id = id_match.group(1) if id_match else "-"
# Extract Severity
sev_match = re.search(r'\[severity "(.*?)"\]', msg)
severity = sev_match.group(1) if sev_match else "INFO"
# Add to list
current_entry['messages'].append({
'rule_id': rule_id,
'severity': severity,
'raw': msg
})
# Calculate "Backup Score" in case explicit score is missing
sev_map = {'CRITICAL': 5, 'ERROR': 4, 'WARNING': 3, 'NOTICE': 2}
current_entry['severity_points'] += sev_map.get(severity.upper(), 0)
# CHECK FOR EMBEDDED SCORES (The specific issue you faced)
# Matches: "matched 4 at TX:outbound_anomaly_score"
embedded_score = re.search(r'matched\s+(\d+)\s+at\s+TX:(inbound|outbound)_anomaly_score', msg, re.IGNORECASE)
if embedded_score:
score_val = int(embedded_score.group(1))
direction = embedded_score.group(2).lower()
if direction == 'inbound':
current_entry['score_in'] = max(current_entry['score_in'], score_val)
else:
current_entry['score_out'] = max(current_entry['score_out'], score_val)
# --- 4. Extract Explicit Scores (Footer) ---
# Flexible regex that handles "Score: 5" and "Score:5" and "Score: 5"
in_score = re.search(r'Total Inbound Score:?\s*(\d+)', chunk)
if in_score:
current_entry['score_in'] = max(current_entry['score_in'], int(in_score.group(1)))
out_score = re.search(r'Total Outbound Score:?\s*(\d+)', chunk)
if out_score:
current_entry['score_out'] = max(current_entry['score_out'], int(out_score.group(1)))
# Append the final entry
if current_entry:
max_score = max(current_entry['score_in'], current_entry['score_out'])
if max_score == 0 and current_entry['severity_points'] > 0:
max_score = current_entry['severity_points']
current_entry['score'] = max_score
entries.append(current_entry)
return entries
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
if 'logfile' not in request.files:
return redirect(request.url)
file = request.files['logfile']
if file.filename == '':
return redirect(request.url)
if file:
# Use 'replace' to handle weird characters in binary logs
content = file.read().decode('utf-8', errors='replace')
parsed_data = parse_modsec_log(content)
total_count = len(parsed_data)
# Limit to last 500 to prevent MacOS crashing
if total_count > 500:
parsed_data = parsed_data[-500:]
return render_template('report.html',
entries=parsed_data,
count=total_count,
shown=len(parsed_data))
return render_template('index.html')
if __name__ == '__main__':
app.run(debug=True, port=5000)