-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate_json.py
More file actions
132 lines (109 loc) · 4.3 KB
/
validate_json.py
File metadata and controls
132 lines (109 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
"""
JSON Validation Utility
Recursively checks directories for JSON files and validates their syntax and encoding.
"""
import json
import os
import sys
import argparse
from pathlib import Path
# Optional dependency: chardet
try:
import chardet
HAS_CHARDET = True
except ImportError:
HAS_CHARDET = False
def validate_json(file_path, verbose=False):
"""
Validates a JSON file for syntax and encoding errors.
Optimized to try UTF-8 first and fallback to chardet if available.
"""
path = Path(file_path)
if not path.exists():
print(f"❌ {file_path}: File not found")
return False
try:
# Pass 1: Try UTF-8 (fast path for 99% of modern files)
try:
with open(path, 'r', encoding='utf-8') as f:
json.load(f)
print(f"✅ {file_path}: Valid JSON (UTF-8)")
return True
except (UnicodeDecodeError, UnicodeError):
# If UTF-8 fails, we need to detect the encoding
if verbose:
print(f"ℹ️ {file_path}: Not UTF-8, attempting encoding detection...")
except json.JSONDecodeError as e:
print(f"❌ {file_path}: Invalid JSON - {e}")
return False
# Pass 2: Encoding Detection Fallback
if not HAS_CHARDET:
print(f"⚠️ {file_path}: Unicode error and 'chardet' not installed. Skipping detection.")
return False
with open(path, 'rb') as f:
# Read a chunk for detection (up to 1MB) to balance accuracy and memory
raw_data = f.read(1024 * 1024)
result = chardet.detect(raw_data)
encoding = result.get('encoding')
if not encoding:
print(f"❌ {file_path}: Could not detect encoding")
return False
with open(path, 'r', encoding=encoding) as f:
try:
json.load(f)
print(f"✅ {file_path}: Valid JSON ({encoding})")
return True
except json.JSONDecodeError as e:
print(f"❌ {file_path}: Invalid JSON - {e}")
return False
except PermissionError:
print(f"❌ {file_path}: Permission denied")
except Exception as e:
print(f"❌ {file_path}: Unexpected error - {type(e).__name__}: {e}")
return False
def find_json_files(directory):
"""
Finds all JSON files in a directory and its subdirectories using pathlib.
"""
base_path = Path(directory)
if not base_path.exists():
print(f"❌ Error: Directory '{directory}' does not exist.")
return []
if not base_path.is_dir():
print(f"❌ Error: '{directory}' is not a directory.")
return []
return list(base_path.rglob("*.json"))
def main():
parser = argparse.ArgumentParser(
description="Recursively validate JSON files for syntax and encoding errors.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("directory", nargs="?", default=".", help="Directory to search for JSON files")
parser.add_argument("-v", "--verbose", action="store_true", help="Show extra information during validation")
parser.add_argument("--fail-fast", action="store_true", help="Stop on the first invalid file")
args = parser.parse_args()
target_dir = args.directory
print(f"🔍 Scanning: {os.path.abspath(target_dir)}")
json_files = find_json_files(target_dir)
if not json_files:
print("No JSON files found.")
return
print(f"📄 Found {len(json_files)} JSON file(s). Validating...\n")
success_count = 0
total_count = len(json_files)
for file_path in json_files:
if validate_json(file_path, verbose=args.verbose):
success_count += 1
elif args.fail_fast:
print("\n🛑 Fail-fast enabled. Stopping validation.")
break
print(f"\n✨ Done! {success_count}/{total_count} files passed.")
if success_count < total_count:
sys.exit(1)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nAborted by user.")
sys.exit(130)