-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathcanary_utils.py
More file actions
228 lines (183 loc) · 5.74 KB
/
canary_utils.py
File metadata and controls
228 lines (183 loc) · 5.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/python3
"""
Shared utilities for Indicator of Canary tools
Contains common functions used across all scripts
"""
import os
import json
import hashlib
from urllib.parse import urlparse
from datetime import datetime
from colorama import Fore, Style, init
import canary_config as config
# Initialize colorama
init()
def safe_decode(content):
"""
Safely decode byte content to string with proper error handling
Args:
content: Byte content to decode
Returns:
Decoded string or None if binary file
"""
try:
return content.decode('utf-8')
except UnicodeDecodeError:
try:
return content.decode('utf-8', errors='ignore')
except:
return None
def url_in_list(url, lst):
"""
Check if URL hostname matches any domain in the list
Args:
url: URL to check
lst: List/set of domains to check against
Returns:
True if URL matches any domain in list
"""
try:
hostname = urlparse(url).hostname
if hostname:
return any(hostname.endswith(domain) for domain in lst)
except:
pass
return False
def extract_urls_from_content(content, filename):
"""
Extract URLs from file content
Args:
content: File content (bytes)
filename: Name of the file for context
Returns:
List of (url, filename) tuples
"""
text = safe_decode(content)
if text is None:
return []
urls = config.URL_PATTERN.findall(text)
return [(url, filename) for url in urls]
def filter_urls(urls, ignore_list=None):
"""
Filter out URLs from ignore list
Args:
urls: List of (url, filename) tuples
ignore_list: Domains to ignore (defaults to config.IGNORE_DOMAINS)
Returns:
Filtered list of URLs
"""
if ignore_list is None:
ignore_list = config.IGNORE_DOMAINS
return [url for url in urls if not url_in_list(url[0], ignore_list)]
def print_colored_urls(urls, alert_list=None):
"""
Print URLs with color coding based on threat level
Args:
urls: List of (url, location) tuples
alert_list: Known bad domains (defaults to config.ALERT_DOMAINS)
"""
if alert_list is None:
alert_list = config.ALERT_DOMAINS
for url, location in urls:
if url_in_list(url, alert_list):
color = Fore.RED
else:
color = Fore.YELLOW
print(f"{color}{url} - {location}{Style.RESET_ALL}")
def hash_file(file_path):
"""
Calculate MD5 and SHA1 hashes of a file
Args:
file_path: Path to file to hash
Returns:
Tuple of (md5, sha1) hashes as hex strings
"""
with open(file_path, "rb") as f:
file_content = f.read()
md5 = hashlib.md5(file_content).hexdigest()
sha1 = hashlib.sha1(file_content).hexdigest()
sha256 = hashlib.sha256(file_content).hexdigest()
return md5, sha1, sha256
def write_to_json(output_path, file_name, data):
"""
Write results to JSON file
Args:
output_path: Path to output JSON file
file_name: Name of the analyzed file
data: Data dictionary to write
"""
# Use proper path functions
if file_name:
file_name = os.path.basename(file_name)
with open(output_path, "w") as f:
json.dump({file_name: data}, f, indent=4)
def format_metadata(meta_data):
"""
Format metadata dictionary for display
Args:
meta_data: Dictionary of metadata
Returns:
Formatted string for display
"""
lines = []
for prop, value in meta_data.items():
if isinstance(value, datetime):
value = value.isoformat()
# Highlight suspicious authors
if prop in ['author', 'creator', 'lastModifiedBy'] and value:
if str(value).lower() in config.BAD_AUTHORS:
lines.append(f"{Fore.RED}{prop}: {value}{Style.RESET_ALL}")
else:
lines.append(f"{prop}: {value}")
else:
lines.append(f"{prop}: {value}")
return '\n'.join(lines)
def should_exclude_file(filename, exclude_patterns):
"""
Check if file should be excluded based on patterns
Args:
filename: File path/name to check
exclude_patterns: Set of patterns to exclude
Returns:
True if file should be excluded
"""
for pattern in exclude_patterns:
if pattern.endswith('/'):
# Directory pattern
if filename.startswith(pattern):
return True
else:
# Exact file match
if filename == pattern:
return True
# Pattern match for slide files etc
if pattern in filename:
return True
return False
def get_file_basename(file_path):
"""
Cross-platform safe way to get filename from path
Args:
file_path: Full file path
Returns:
Base filename
"""
return os.path.basename(file_path)
def validate_file_exists(file_path):
"""
Validate that a file exists and is readable
Args:
file_path: Path to validate
Returns:
True if file exists and is readable
Raises:
FileNotFoundError: If file doesn't exist
PermissionError: If file isn't readable
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
if not os.path.isfile(file_path):
raise ValueError(f"Path is not a file: {file_path}")
if not os.access(file_path, os.R_OK):
raise PermissionError(f"File is not readable: {file_path}")
return True