-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck.py
More file actions
50 lines (38 loc) · 1.21 KB
/
check.py
File metadata and controls
50 lines (38 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from pathlib import Path
from PIL import Image
BASE_DIR = Path("data")
TRAIN_DIR = BASE_DIR / "train"
VAL_DIR = BASE_DIR / "val"
BAD_DIR = BASE_DIR / "bad"
def check_dir(dir_path: Path):
print(f"Checking {dir_path}...")
count = 0
bad = 0
for img_path in dir_path.rglob("*"):
if not img_path.is_file():
continue
# Optional: restrict to known extensions
if img_path.suffix.lower() not in {".jpg", ".jpeg", ".png"}:
continue
count += 1
try:
with Image.open(img_path) as img:
img.verify() # check integrity
except Exception as e:
bad += 1
print(f" BAD: {img_path} ({e})")
# Move to bad/ preserving subdirs
rel = img_path.relative_to(BASE_DIR)
target = BAD_DIR / rel
target.parent.mkdir(parents=True, exist_ok=True)
img_path.rename(target)
print(f"Checked {count} images in {dir_path}, found {bad} bad.")
def main():
BAD_DIR.mkdir(parents=True, exist_ok=True)
if TRAIN_DIR.exists():
check_dir(TRAIN_DIR)
if VAL_DIR.exists():
check_dir(VAL_DIR)
print("Done.")
if __name__ == "__main__":
main()