Skip to content

Commit ed07588

Browse files
authored
Merge pull request #71 from dlcs/bugfix/memory_leak
Close PIL Image after use
2 parents c807c4b + 3b7ac0d commit ed07588

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed

src/app/engine/rasterizers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def rasterize_pdf(self, subfolder_path):
3333
pdf_source = os.path.join(subfolder_path, "source.pdf")
3434
images = self.__rasterize(pdf_source, subfolder_path, dpi=self._dpi)
3535
images = self.__validate_rasterized_images(images, pdf_source, subfolder_path)
36-
return images
36+
return [i.filename for i in images]
3737

3838
def __rasterize(
3939
self, pdf_source, subfolder_path, start_page=None, last_page=None, dpi=None
@@ -65,6 +65,7 @@ def __validate_rasterized_images(self, images, pdf_source, subfolder_path):
6565
if res == ResizeResult.SINGLE_PIXEL:
6666
single_pixel_pages.append(idx + 1)
6767
idx += 1
68+
im.close()
6869

6970
if single_pixel_pages:
7071
return self.__rescale_single_page_default_dpi(
@@ -89,8 +90,8 @@ def __ensure_image_size(self, idx, im: Image):
8990
logger.info(
9091
f"resizing image index {idx} from {w},{h} to {scale_w},{scale_h}"
9192
)
92-
resized = im.resize((scale_w, scale_h), resample=Image.LANCZOS)
93-
resized.save(filename)
93+
with im.resize((scale_w, scale_h), resample=Image.LANCZOS) as resized:
94+
resized.save(filename)
9495
return ResizeResult.RESIZED
9596

9697
return ResizeResult.NOOP

src/app/engine/s3.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ def __build_bucket_base_url(self):
2626
else:
2727
return f"https://s3.amazonaws.com/{self._bucket_name}"
2828

29-
def put_images(self, images, submission_id, composite_id, customer_id, space_id):
29+
def put_images(
30+
self, image_paths, submission_id, composite_id, customer_id, space_id
31+
):
3032
s3_uris = []
3133

3234
key_prefix = self.__get_key_prefix(
@@ -36,14 +38,14 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id)
3638
with tqdm.tqdm(
3739
desc=f"[{submission_id}] Upload images to S3",
3840
unit=" image",
39-
total=len(images),
41+
total=len(image_paths),
4042
) as progress_bar:
4143
with ThreadPoolExecutor(max_workers=self._upload_threads) as executor:
4244
# It's critical that the list of S3 URI's returned by this method is in the
4345
# same order as the list of images provided to it. '.map(...)' gives us that,
4446
# whilst '.submit(...)' does not.
4547
for s3_uri in executor.map(
46-
self.__put_image, repeat(key_prefix), images
48+
self.__put_image, repeat(key_prefix), image_paths
4749
):
4850
s3_uris.append(s3_uri)
4951
progress_bar.update(1)
@@ -52,8 +54,8 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id)
5254
def __get_key_prefix(self, submission_id, composite_id, customer, space):
5355
return f"{self._object_key_prefix}/{customer}/{space}/{composite_id or submission_id}"
5456

55-
def __put_image(self, key_prefix, image):
56-
object_key = f"{key_prefix}/{os.path.basename(image.filename)}"
57-
with open(image.filename, "rb") as file:
57+
def __put_image(self, key_prefix, image_path):
58+
object_key = f"{key_prefix}/{os.path.basename(image_path)}"
59+
with open(image_path, "rb") as file:
5860
self._client.put_object(Bucket=self._bucket_name, Key=object_key, Body=file)
5961
return f"{self._bucket_base_url}/{object_key}"

src/app/engine/tasks.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ def process_member(args):
2828
folder_path = None
2929
try:
3030
folder_path = __fetch_origin(member, member.json_data["origin"])
31-
images = __rasterize_composite(member, folder_path)
32-
s3_urls = __push_images_to_dlcs(member, images)
31+
image_paths = __rasterize_composite(member, folder_path)
32+
s3_urls = __push_images_to_dlcs(member, image_paths)
3333
dlcs_requests = __build_dlcs_requests(member, s3_urls)
3434
dlcs_responses = __initiate_dlcs_ingest(member, dlcs_requests, args["auth"])
3535
return __build_result(member, dlcs_responses)
@@ -49,12 +49,12 @@ def __rasterize_composite(member, pdf_path):
4949
return pdf_rasterizer.rasterize_pdf(pdf_path)
5050

5151

52-
def __push_images_to_dlcs(member, images):
53-
__update_status(member, "PUSHING_TO_DLCS", image_count=len(images))
52+
def __push_images_to_dlcs(member, image_paths):
53+
__update_status(member, "PUSHING_TO_DLCS", image_count=len(image_paths))
5454
composite_id = member.json_data.get("compositeId")
5555
customer = member.collection.customer
5656
space = member.json_data["space"]
57-
return s3_client.put_images(images, member.id, composite_id, customer, space)
57+
return s3_client.put_images(image_paths, member.id, composite_id, customer, space)
5858

5959

6060
def __build_dlcs_requests(member, dlcs_uris):

0 commit comments

Comments
 (0)