forked from sashaDoubov/PreprocessingOfficeImages
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats_whole_image.py
More file actions
75 lines (56 loc) · 2.63 KB
/
stats_whole_image.py
File metadata and controls
75 lines (56 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
from PIL import Image, ImageDraw
from collections import defaultdict
import sys
import random
"""
Provides stats about the labelled images for the user. Use *before* splitting the images.
To be exact, it displays how many images of each possible vector there are (00001, 00011...)
It also says how many positive and negative examples there are for each spatial region
example: top left images etc.
usage: python stats_whole_image.py <image folder path>
Note:
These stats are only including 150 of the "00010" type of image, due to the nature of the data.
Should be modified if data is different
"""
image_folder_path = sys.argv[1]
listOfFiles = os.listdir(image_folder_path)
lenFiles = len(listOfFiles)
category_to_image = defaultdict(list)
spatial_positions = ["top left ", "top right ", "bottom left ", "bottom right", "center "]
for i, filename in enumerate(listOfFiles):
name, ext = filename.split('.')
if ext == 'txt':
with open(os.path.join(image_folder_path, filename), 'r') as f:
category = f.read()
category_to_image[category].append("{}.jpg".format(name))
# HACK: in order to balance out the data used, I drop the images which contained a lot of 00010 examples
# Should be changed if the data is different
right_bottom_images = category_to_image["00010"]
random.shuffle(right_bottom_images)
category_to_image["00010"] = right_bottom_images[:150]
num_per_index = defaultdict(list)
for category, image_list in category_to_image.iteritems():
print "{} has {} images".format(category, len(image_list))
for i in range(5):
if category[i] == '1':
num_per_index[i].extend(image_list)
sum_i =0
print "-------------------------------------------------------"
print "Positive Examples - Occupied"
for index, number_of_imgs in num_per_index.iteritems():
print "Index {}: {} {}/{} images".format(index, spatial_positions[index], len(number_of_imgs), lenFiles/2)
sum_i += len(number_of_imgs)
print "sum ", sum_i
num_per_index = defaultdict(list)
for category, image_list in category_to_image.iteritems():
for i in range(5):
if category[i] == '0':
num_per_index[i].extend(image_list)
print "-------------------------------------------------------"
print "Negative Examples - Unoccupied"
sum_i = 0
for index, number_of_imgs in num_per_index.iteritems():
print "Index {}: {} {}/{} images".format(index, spatial_positions[index], len(number_of_imgs), lenFiles/2)
sum_i += len(number_of_imgs)
print "sum ", sum_i