IndicatorOfCanary/canary_utils.py at main · HackingLZ/IndicatorOfCanary · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/python3
"""
Shared utilities for Indicator of Canary tools
Contains common functions used across all scripts
"""

import os
import json
import hashlib
from urllib.parse import urlparse
from datetime import datetime
from colorama import Fore, Style, init
import canary_config as config

# Initialize colorama
init()

def safe_decode(content):
    """
    Safely decode byte content to string with proper error handling

    Args:
        content: Byte content to decode

    Returns:
        Decoded string or None if binary file
    """
    try:
        return content.decode('utf-8')
    except UnicodeDecodeError:
        try:
            return content.decode('utf-8', errors='ignore')
        except:
            return None

def url_in_list(url, lst):
    """
    Check if URL hostname matches any domain in the list

    Args:
        url: URL to check
        lst: List/set of domains to check against

    Returns:
        True if URL matches any domain in list
    """
    try:
        hostname = urlparse(url).hostname
        if hostname:
            return any(hostname.endswith(domain) for domain in lst)
    except:
        pass
    return False

def extract_urls_from_content(content, filename):
    """
    Extract URLs from file content

    Args:
        content: File content (bytes)
        filename: Name of the file for context

    Returns:
        List of (url, filename) tuples
    """
    text = safe_decode(content)
    if text is None:
        return []

    urls = config.URL_PATTERN.findall(text)
    return [(url, filename) for url in urls]

def filter_urls(urls, ignore_list=None):
    """
    Filter out URLs from ignore list

    Args:
        urls: List of (url, filename) tuples
        ignore_list: Domains to ignore (defaults to config.IGNORE_DOMAINS)

    Returns:
        Filtered list of URLs
    """
    if ignore_list is None:
        ignore_list = config.IGNORE_DOMAINS

    return [url for url in urls if not url_in_list(url[0], ignore_list)]

def print_colored_urls(urls, alert_list=None):
    """
    Print URLs with color coding based on threat level

    Args:
        urls: List of (url, location) tuples
        alert_list: Known bad domains (defaults to config.ALERT_DOMAINS)
    """
    if alert_list is None:
        alert_list = config.ALERT_DOMAINS

    for url, location in urls:
        if url_in_list(url, alert_list):
            color = Fore.RED
        else:
            color = Fore.YELLOW
        print(f"{color}{url} - {location}{Style.RESET_ALL}")

def hash_file(file_path):
    """
    Calculate MD5 and SHA1 hashes of a file

    Args:
        file_path: Path to file to hash

    Returns:
        Tuple of (md5, sha1) hashes as hex strings
    """
    with open(file_path, "rb") as f:
        file_content = f.read()

    md5 = hashlib.md5(file_content).hexdigest()
    sha1 = hashlib.sha1(file_content).hexdigest()
    sha256 = hashlib.sha256(file_content).hexdigest()

    return md5, sha1, sha256

def write_to_json(output_path, file_name, data):
    """
    Write results to JSON file

    Args:
        output_path: Path to output JSON file
        file_name: Name of the analyzed file
        data: Data dictionary to write
    """
    # Use proper path functions
    if file_name:
        file_name = os.path.basename(file_name)

    with open(output_path, "w") as f:
        json.dump({file_name: data}, f, indent=4)

def format_metadata(meta_data):
    """
    Format metadata dictionary for display

    Args:
        meta_data: Dictionary of metadata

    Returns:
        Formatted string for display
    """
    lines = []
    for prop, value in meta_data.items():
        if isinstance(value, datetime):
            value = value.isoformat()

        # Highlight suspicious authors
        if prop in ['author', 'creator', 'lastModifiedBy'] and value:
            if str(value).lower() in config.BAD_AUTHORS:
                lines.append(f"{Fore.RED}{prop}: {value}{Style.RESET_ALL}")
            else:
                lines.append(f"{prop}: {value}")
        else:
            lines.append(f"{prop}: {value}")

    return '\n'.join(lines)

def should_exclude_file(filename, exclude_patterns):
    """
    Check if file should be excluded based on patterns

    Args:
        filename: File path/name to check
        exclude_patterns: Set of patterns to exclude

    Returns:
        True if file should be excluded
    """
    for pattern in exclude_patterns:
        if pattern.endswith('/'):
            # Directory pattern
            if filename.startswith(pattern):
                return True
        else:
            # Exact file match
            if filename == pattern:
                return True
            # Pattern match for slide files etc
            if pattern in filename:
                return True
    return False

def get_file_basename(file_path):
    """
    Cross-platform safe way to get filename from path

    Args:
        file_path: Full file path

    Returns:
        Base filename
    """
    return os.path.basename(file_path)

def validate_file_exists(file_path):
    """
    Validate that a file exists and is readable

    Args:
        file_path: Path to validate

    Returns:
        True if file exists and is readable

    Raises:
        FileNotFoundError: If file doesn't exist
        PermissionError: If file isn't readable
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    if not os.path.isfile(file_path):
        raise ValueError(f"Path is not a file: {file_path}")

    if not os.access(file_path, os.R_OK):
        raise PermissionError(f"File is not readable: {file_path}")

    return True