-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
132 lines (110 loc) · 4 KB
/
utils.py
File metadata and controls
132 lines (110 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import re
import sys
import cv2
import socket
import numpy as np
import urlparse
from decorators import limitable
from cStringIO import StringIO
from config import IMG_FORMATS, DEFAULT_SIZE, DEFAULT_FMT, MAX_RETRIES, POOL_SIZE
using_gevent = False
try:
import gevent
from gevent.pool import Pool
from gevent import monkey
monkey.patch_socket()
monkey.patch_ssl()
using_gevent = True
except ImportError:
print >> sys.stderr, "w. Not using gevent. Will be slower."
from urllib2 import urlopen, URLError
@limitable
def ximages(dirpath, formats=IMG_FORMATS, gray=True, checksize=False):
"""A generator that yields any images found in the input folder"""
sizes = set()
for root, dirs, files in os.walk(dirpath):
for fn in files:
ext = fn.split(os.path.extsep)[-1]
if ext.lower() in formats:
path = os.path.join(root, fn)
flags = cv2.CV_LOAD_IMAGE_GRAYSCALE if gray else cv2.CV_LOAD_IMAGE_COLOR
img = cv2.imread(path, flags=flags)
if img is not None:
sizes |= set([img.shape])
if checksize and len(sizes) > 1:
raise ValueError("Folder contains images with different sizes: %s" % dirpath)
yield img
@limitable
def xvideo(path, gray=True):
cap = cv2.VideoCapture(path)
while True:
ret, frame = cap.read()
if not ret:
raise StopIteration("Frame could not be retrieved")
yield cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if gray else frame
def url_encode_non_ascii(b):
return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b)
def iri2uri(iri):
parts = urlparse.urlparse(iri)
return urlparse.urlunparse(
part.encode('idna') if parti == 1 else url_encode_non_ascii(part.encode('utf-8'))
for parti, part in enumerate(parts)
)
def get_image(iri):
retries = 2 # maxretries
while retries > 0:
try:
uri = iri2uri(iri)
data = urlopen(uri).read()
flike = StringIO(data)
a = np.asarray(bytearray(flike.read()), dtype=np.uint8)
img = cv2.imdecode(a, flags=cv2.CV_LOAD_IMAGE_GRAYSCALE)
return img
except (URLError, socket.error, UnicodeError) as e:
print >> sys.stderr, 'w.', uri, ':', e
retries -= 1
def get_images(iris):
jobs = [gevent.spawn(get_image, i) for i in iris]
gevent.joinall(jobs)
return [j.value for j in jobs if j.value is not None]
def iriopen(iri, retries=MAX_RETRIES):
'''Takes an Internationalized Resource Identifier and returns the image it points to'''
while retries > 0:
try:
uri = iri2uri(iri)
data = urlopen(uri).read()
flike = StringIO(data)
a = np.asarray(bytearray(flike.read()), dtype=np.uint8)
img = cv2.imdecode(a, flags=cv2.CV_LOAD_IMAGE_GRAYSCALE)
return img
except (URLError, socket.error, UnicodeError) as e:
print >> sys.stderr, 'w.', uri, ':', e
retries -= 1
@limitable
def xweb(iris):
for i in iris:
img = iriopen(i)
if img is not None:
yield img
if using_gevent:
@limitable
def xweb(iris):
pool = Pool(POOL_SIZE)
for img in pool.imap_unordered(iriopen, iris):
if img is not None:
yield img
def xresize(img_stream, size=DEFAULT_SIZE, interpolation=cv2.INTER_LINEAR):
for img in img_stream:
yield cv2.resize(img, size, interpolation=interpolation)
def write_to(img_iter, outdir, format=DEFAULT_FMT, limit=None):
for i, img in enumerate(img_iter):
if limit is not None and i >= limit:
break
if not os.path.isdir(outdir):
os.makedirs(outdir)
print "Created:", outdir
path = os.path.join(outdir, str(i).zfill(5) + '.' + format)
cv2.imwrite(path, img)
sys.stdout.write("\rWrote: %s" % path)
sys.stdout.flush()