-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaxifier.py
More file actions
453 lines (363 loc) · 15.4 KB
/
maxifier.py
File metadata and controls
453 lines (363 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
import os
import re
import sys
import json
import argparse
import requests
import base64
from urllib.parse import urljoin, urlparse, urlunparse, unquote
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Recreate unminified source paths on disk from sourcemaps (with sourcesContent).",
usage="%(prog)s [-h] [-v] [-a [INPUT]] [-m MAP] [-f FILE] [-o OUT_DIR] [input]",
epilog="""Examples:
%(prog)s app.js.map -o ./output_dir
or
%(prog)s app.js -a -o ./output_dir
or
%(prog)s --auto file:///Users/me/build/app.js -o ./output_dir
or
%(prog)s --map https://example.com/scripts/app.js.map --file ./app.js -a -o ./output_dir""",
)
parser.add_argument(
"input",
nargs="?",
default=None,
help="Map or JS reference (http(s), file://, or local path).",
)
parser.add_argument(
"-m",
"--map",
dest="sourcemap",
default=None,
help="Sourcemap reference (http(s), file://, local path, or base64 data URL).",
)
parser.add_argument(
"-f",
"--file",
dest="js_file",
default=None,
help="JavaScript source reference (http(s), file://, or local path).",
)
parser.add_argument(
"-a",
"--auto",
"--auto_map",
nargs="?",
const=True,
default=False,
metavar="INPUT",
help="Auto-resolve missing map/js pair from the provided input. Optional INPUT preserves legacy --auto_map INPUT usage.",
)
parser.add_argument("-o", "--out_dir", default="./output", help="Directory to save extracted files")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
args = parser.parse_args()
def log(message):
if args.verbose:
print(message, file=sys.stderr)
def _append_unique(items, candidate):
if candidate and candidate not in items:
items.append(candidate)
def file_url_to_path(file_url):
parsed = urlparse(file_url)
path = unquote(parsed.path)
if parsed.netloc and parsed.netloc != "localhost":
path = f"//{parsed.netloc}{path}"
if os.name == "nt" and re.match(r"^/[A-Za-z]:", path):
path = path[1:]
return path
def reference_to_local_path(reference):
parsed = urlparse(reference)
if parsed.scheme in ("http", "https", "data"):
return None
if parsed.scheme == "file":
return os.path.abspath(file_url_to_path(reference))
if parsed.scheme:
return None
return os.path.abspath(reference)
def resolve_reference(reference, base_reference=None):
parsed = urlparse(reference)
if parsed.scheme:
return reference
if base_reference:
base_parsed = urlparse(base_reference)
if base_parsed.scheme in ("http", "https"):
return urljoin(base_reference, reference)
if base_parsed.scheme == "file":
base_dir = os.path.dirname(file_url_to_path(base_reference))
else:
base_dir = os.path.dirname(os.path.abspath(base_reference))
return os.path.normpath(os.path.join(base_dir, reference))
return reference
def candidate_references(reference, base_reference=None):
candidates = []
primary = resolve_reference(reference, base_reference)
_append_unique(candidates, primary)
primary_parsed = urlparse(primary)
primary_path = primary_parsed.path if primary_parsed.scheme else primary
base_name = os.path.basename(primary_path)
if base_name:
_append_unique(candidates, os.path.join(os.getcwd(), base_name))
original_parsed = urlparse(reference)
if not original_parsed.scheme:
_append_unique(candidates, os.path.abspath(reference))
_append_unique(candidates, os.path.join(os.getcwd(), reference))
return candidates
def _format_reference_error(reference, errors, kind):
if not errors:
return f"Unable to resolve {kind} reference: {reference}"
joined = "; ".join(errors)
return f"Unable to resolve {kind} reference '{reference}'. Tried: {joined}"
def load_text_reference(reference, base_reference=None, kind="file"):
errors = []
for candidate in candidate_references(reference, base_reference):
parsed = urlparse(candidate)
scheme = parsed.scheme
if scheme in ("http", "https"):
try:
response = requests.get(candidate)
response.raise_for_status()
return response.text, response.headers, candidate, None, None
except Exception as exc:
errors.append(f"{candidate} ({exc})")
continue
if scheme == "file":
path = file_url_to_path(candidate)
elif not scheme:
path = candidate
else:
errors.append(f"{candidate} (unsupported scheme '{scheme}')")
continue
abs_path = os.path.abspath(path)
try:
with open(abs_path, "r", encoding="utf-8") as file_obj:
return file_obj.read(), {}, abs_path, abs_path, None
except Exception as exc:
errors.append(f"{abs_path} ({exc})")
continue
return None, None, None, None, _format_reference_error(reference, errors, kind)
def load_sourcemap_reference(reference, base_reference=None):
if reference.startswith("data:application/json;base64,"):
base64_data = reference[len("data:application/json;base64,"):]
try:
decoded = base64.b64decode(base64_data).decode("utf-8")
return json.loads(decoded), "<data-url>", None, None
except Exception as exc:
return None, None, None, f"Failed to decode Base64 sourcemap: {exc}"
text, _, resolved_reference, local_path, error = load_text_reference(
reference, base_reference=base_reference, kind="sourcemap"
)
if error:
return None, None, None, error
try:
return json.loads(text), resolved_reference, local_path, None
except Exception as exc:
return None, None, None, f"Failed to parse sourcemap JSON from {resolved_reference}: {exc}"
def extract_sourcemap_reference_from_js(js_content):
matches = re.findall(r"(?m)^[ \t]*//[#@]\s*sourceMappingURL=(\S+)\s*$", js_content)
return matches[-1].strip() if matches else None
def infer_sourcemap_candidates(js_content, js_reference, js_headers):
refs = []
header_ref = js_headers.get("X-SourceMap") or js_headers.get("SourceMap")
if header_ref:
_append_unique(refs, header_ref.strip())
source_map_ref = extract_sourcemap_reference_from_js(js_content)
if source_map_ref:
_append_unique(refs, source_map_ref)
if js_reference:
parsed = urlparse(js_reference)
if parsed.scheme in ("http", "https", "file"):
path = f"{parsed.path}.map" if not parsed.path.endswith(".map") else parsed.path
default_ref = urlunparse((parsed.scheme, parsed.netloc, path, "", "", ""))
_append_unique(refs, default_ref)
else:
_append_unique(refs, f"{js_reference}.map")
return refs
def infer_js_candidates(sourcemap, sourcemap_reference):
refs = []
map_file_field = sourcemap.get("file")
if isinstance(map_file_field, str) and map_file_field.strip():
_append_unique(refs, map_file_field.strip())
if sourcemap_reference:
parsed = urlparse(sourcemap_reference)
if parsed.scheme in ("http", "https", "file"):
if parsed.path.endswith(".map"):
js_path = parsed.path[:-4]
js_ref = urlunparse((parsed.scheme, parsed.netloc, js_path, "", "", ""))
_append_unique(refs, js_ref)
else:
if sourcemap_reference.endswith(".map"):
_append_unique(refs, sourcemap_reference[:-4])
return refs
def try_load_first_sourcemap(candidates, base_reference=None):
errors = []
for candidate in candidates:
sourcemap, resolved_ref, local_path, error = load_sourcemap_reference(
candidate, base_reference=base_reference
)
if not error:
return sourcemap, resolved_ref, local_path, None
errors.append(error)
return None, None, None, "; ".join(errors)
def try_load_first_js(candidates, base_reference=None):
errors = []
for candidate in candidates:
js_content, headers, resolved_ref, local_path, error = load_text_reference(
candidate, base_reference=base_reference, kind="JavaScript"
)
if not error:
return js_content, headers, resolved_ref, local_path, None
errors.append(error)
return None, None, None, None, "; ".join(errors)
def looks_like_map_reference(reference):
path = urlparse(reference).path.lower()
return path.endswith(".map") or path.endswith(".json")
def looks_like_js_reference(reference):
path = urlparse(reference).path.lower()
return path.endswith(".js") or path.endswith(".mjs") or path.endswith(".cjs")
def choose_refs(input_reference, map_reference, js_reference, auto_enabled):
if not input_reference:
return map_reference, js_reference
if map_reference and js_reference:
parser.error("Positional input cannot be used when both --map and --file are already provided.")
if map_reference and not js_reference:
return map_reference, input_reference
if js_reference and not map_reference:
return input_reference, js_reference
if looks_like_map_reference(input_reference):
return input_reference, js_reference
if looks_like_js_reference(input_reference):
return map_reference, input_reference
if auto_enabled:
return map_reference, input_reference
return input_reference, js_reference
def normalize_source_path(source_path, local_base_file=None):
if not source_path:
return "unknown_source"
def squash_relative_parts(path_value):
normalized = os.path.normpath(path_value).replace("\\", "/")
parts = []
for part in normalized.split("/"):
if part in ("", "."):
continue
if part == "..":
if parts:
parts.pop()
continue
parts.append(part)
return "/".join(parts) if parts else "unknown_source"
if local_base_file:
if os.path.isabs(source_path):
normalized = os.path.normpath(source_path)
return os.path.relpath(normalized, start="/").replace("\\", "/")
base_dir = os.path.dirname(os.path.abspath(local_base_file))
normalized = os.path.normpath(os.path.join(base_dir, source_path))
rel_from_base = os.path.relpath(normalized, start=base_dir)
if rel_from_base == ".." or rel_from_base.startswith(f"..{os.sep}"):
return squash_relative_parts(source_path)
return rel_from_base.replace("\\", "/")
if os.path.isabs(source_path):
return os.path.relpath(os.path.normpath(source_path), start="/").replace("\\", "/")
return squash_relative_parts(source_path)
auto_value = args.auto
auto_enabled = bool(auto_value)
input_reference = args.input
if isinstance(auto_value, str):
auto_enabled = True
if input_reference and input_reference != auto_value:
parser.error("Provide either positional input or '-a INPUT', not both.")
input_reference = input_reference or auto_value
sourcemap_reference, js_reference = choose_refs(input_reference, args.sourcemap, args.js_file, auto_enabled)
if not sourcemap_reference and not js_reference and sys.stdin.isatty():
parser.print_help(sys.stderr)
print(
"\nProvide at least one map/js input (or pipe a sourcemap JSON to stdin).",
file=sys.stderr,
)
sys.exit(1)
sourcemap = None
sourcemap_loaded_ref = None
sourcemap_local_path = None
js_content = None
js_headers = {}
js_loaded_ref = None
js_local_path = reference_to_local_path(js_reference) if js_reference else None
if sourcemap_reference:
log(f"Loading sourcemap from: {sourcemap_reference}")
sourcemap, sourcemap_loaded_ref, sourcemap_local_path, map_error = load_sourcemap_reference(sourcemap_reference)
if map_error:
if auto_enabled and js_reference:
log(f"Unable to load sourcemap directly yet: {map_error}")
else:
print(map_error, file=sys.stderr)
sys.exit(1)
if js_reference and (auto_enabled or sourcemap is None):
log(f"Loading JavaScript from: {js_reference}")
js_content, js_headers, js_loaded_ref, js_local_path, js_error = load_text_reference(
js_reference, kind="JavaScript"
)
if js_error:
if auto_enabled and sourcemap is not None:
log(f"Unable to load JavaScript directly: {js_error}")
else:
print(js_error, file=sys.stderr)
sys.exit(1)
if sourcemap is None and not sys.stdin.isatty():
piped_input = sys.stdin.read().strip()
if piped_input:
try:
sourcemap = json.loads(piped_input)
sourcemap_loaded_ref = "<stdin>"
sourcemap_local_path = None
log("Loaded sourcemap from stdin.")
except json.JSONDecodeError as exc:
print(f"Failed to parse sourcemap JSON from stdin: {exc}", file=sys.stderr)
sys.exit(1)
if auto_enabled and sourcemap is None and js_content:
candidates = infer_sourcemap_candidates(js_content, js_loaded_ref or js_reference, js_headers)
log(f"Auto-resolving sourcemap from JavaScript candidates: {candidates}")
sourcemap, sourcemap_loaded_ref, sourcemap_local_path, map_error = try_load_first_sourcemap(
candidates, base_reference=js_loaded_ref or js_reference
)
if map_error:
print(f"Auto map resolution failed: {map_error}", file=sys.stderr)
sys.exit(1)
if auto_enabled and js_content is None and sourcemap is not None:
candidates = infer_js_candidates(sourcemap, sourcemap_loaded_ref or sourcemap_reference)
if candidates:
log(f"Auto-resolving JavaScript from sourcemap candidates: {candidates}")
js_content, js_headers, js_loaded_ref, js_local_path, js_error = try_load_first_js(
candidates, base_reference=sourcemap_loaded_ref or sourcemap_reference
)
if js_error:
log(f"Auto JS resolution failed: {js_error}")
if sourcemap is None:
print(
"Unable to load a sourcemap. Provide one directly, pipe JSON to stdin, or pass -a with a JavaScript input.",
file=sys.stderr,
)
sys.exit(1)
if "sources" not in sourcemap:
print("Error: The sourcemap does not contain 'sources'. Exiting.", file=sys.stderr)
sys.exit(1)
if "sourcesContent" not in sourcemap:
print("Error: The sourcemap does not contain 'sourcesContent'. Exiting.", file=sys.stderr)
sys.exit(1)
log(json.dumps(sourcemap, indent=4))
out_dir = args.out_dir
log(f"Extracting source files into: {out_dir}")
os.makedirs(out_dir, exist_ok=True)
if not os.access(out_dir, os.W_OK):
print(f"Error: Output directory '{out_dir}' is not writable. Exiting.", file=sys.stderr)
sys.exit(1)
source_base_file = js_local_path or sourcemap_local_path
sourcemap["sources"] = [normalize_source_path(src, source_base_file) for src in sourcemap["sources"]]
for filename, content in zip(sourcemap["sources"], sourcemap["sourcesContent"]):
filename = os.path.normpath(filename)
file_path = os.path.join(out_dir, *filename.split("/"))
log(f"Generating {file_path}..")
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "w", encoding="utf-8") as source_file:
source_file.write(content or "")
log("..done.")
log(f"Extraction complete. Sources saved to: {out_dir}")