-
Notifications
You must be signed in to change notification settings - Fork 352
Expand file tree
/
Copy pathrunstat.py
More file actions
executable file
·52 lines (45 loc) · 2.03 KB
/
runstat.py
File metadata and controls
executable file
·52 lines (45 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
import os, subprocess
from collections import defaultdict
def remap_sa_path(x):
if b"/sa" in x:
b,e = x.split(b'/sa')
ret = b"%s/%04d%s" % (b,int(e[0:5])+5000, e[5:])
return ret
return x
MASK_DIRS = [b"masks/", b"masks2/", b"masksd/"]
IMG_DIRS = {b"masks/": b"imgs/", b"masks2/": b"imgs2/", b"masksd/": b"imgsd/"}
MIN_COMMITS = {b"masks/": 1, b"masks2/": 1, b"masksd/": 0}
raw = subprocess.check_output("git rev-list --objects --all | awk '$2' | sort -k2 | uniq -cf1 | sort -rn", shell=True).strip().split(b"\n")
num_commits_map = {}
al_sets = defaultdict(set)
num_commits_hists = defaultdict(lambda: defaultdict(int))
for j in raw:
jj = j.strip().split(b" ")
if len(jj) != 3:
continue
num_commits, _, mask_path = jj
mask_path = remap_sa_path(mask_path)
num_commits = int(num_commits)
mask_dir = next((d for d in MASK_DIRS if mask_path.startswith(d)), None)
if mask_dir is None or not os.path.isfile(mask_path):
continue
al_sets[mask_dir].add(mask_path)
num_commits_hists[mask_dir][num_commits] += 1
if mask_path not in num_commits_map:
num_commits_map[mask_path] = num_commits
img_totals = {mask_dir: len(os.listdir(IMG_DIRS[mask_dir])) for mask_dir in MASK_DIRS}
total_all = sum(img_totals.values())
trainable = sorted(mask_path for mask_path, num_commits in num_commits_map.items()
if num_commits > MIN_COMMITS[next(d for d in MASK_DIRS if mask_path.startswith(d))])
for mask_dir in MASK_DIRS:
total = img_totals[mask_dir]
labeled = sum(1 for mask_path in al_sets[mask_dir] if num_commits_map.get(mask_path, 0) > MIN_COMMITS[mask_dir])
print(f"{mask_dir.decode()}: {labeled}/{total} labeled ({labeled/total*100.:.2f}%)")
hist = num_commits_hists[mask_dir]
for num_commits in sorted(hist):
print(f" num_commits={num_commits:2d}: {hist[num_commits]} files")
with open("files_trainable", "wb") as f:
f.write(b'\n'.join(trainable))
pct = len(trainable)/total_all*100. if total_all else 0.
print(f"\nfiles_trainable total: {len(trainable)}/{total_all} ({pct:.2f}%)")