From a92303f6f54781182107504ebe5aa62982b52eb4 Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Sun, 9 Oct 2022 04:28:50 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- .../accuracy/remote_accuracy_job.py | 21 +++++++++++++++- .../remote_annotate_dataset_job.py | 21 +++++++++++++++- .../remote_per_tensor_report_job.py | 21 +++++++++++++++- .../accuracy/parse_accuracy_result_job.py | 24 ++++++++++++++++++- .../parse_annotate_dataset_result_job.py | 24 ++++++++++++++++++- ..._tensor_distance_calculation_result_job.py | 24 ++++++++++++++++++- .../parse_int8_calibration_result_job.py | 24 ++++++++++++++++++- .../remote_int8_calibration_job.py | 21 +++++++++++++++- 8 files changed, 172 insertions(+), 8 deletions(-) diff --git a/wb/main/jobs/accuracy_analysis/accuracy/remote_accuracy_job.py b/wb/main/jobs/accuracy_analysis/accuracy/remote_accuracy_job.py index 432710f8..a0923549 100644 --- a/wb/main/jobs/accuracy_analysis/accuracy/remote_accuracy_job.py +++ b/wb/main/jobs/accuracy_analysis/accuracy/remote_accuracy_job.py @@ -56,7 +56,26 @@ def collect_artifacts(self): dest_archive = str(self.get_job_results_path(job_model) / JOB_ARTIFACTS_ARCHIVE_NAME) collect_artifacts(target.id, result_archive, dest_archive, session) with tarfile.open(dest_archive, 'r:gz') as tar: - tar.extractall(path=self.get_job_results_path(job_model)) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.get_job_results_path(job_model)) def on_success(self): super().on_success() diff --git a/wb/main/jobs/accuracy_analysis/annotate_datset/remote_annotate_dataset_job.py b/wb/main/jobs/accuracy_analysis/annotate_datset/remote_annotate_dataset_job.py index 94d248c7..872feec1 100644 --- a/wb/main/jobs/accuracy_analysis/annotate_datset/remote_annotate_dataset_job.py +++ b/wb/main/jobs/accuracy_analysis/annotate_datset/remote_annotate_dataset_job.py @@ -56,4 +56,23 @@ def collect_artifacts(self): collect_artifacts(target.id, result_archive, dest_archive, session) with tarfile.open(dest_archive, 'r:gz') as tar: - tar.extractall(path=job_model.result_dataset.path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=job_model.result_dataset.path) diff --git a/wb/main/jobs/accuracy_analysis/per_tensor/remote_per_tensor_report_job.py b/wb/main/jobs/accuracy_analysis/per_tensor/remote_per_tensor_report_job.py index 1604a816..b87bea53 100644 --- a/wb/main/jobs/accuracy_analysis/per_tensor/remote_per_tensor_report_job.py +++ b/wb/main/jobs/accuracy_analysis/per_tensor/remote_per_tensor_report_job.py @@ -55,7 +55,26 @@ def collect_artifacts(self): dest_archive = str(self.get_job_results_path(job_model) / JOB_ARTIFACTS_ARCHIVE_NAME) collect_artifacts(target.id, result_archive, dest_archive, session) with tarfile.open(dest_archive, 'r:gz') as tar: - tar.extractall(path=self.get_job_results_path(job_model)) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.get_job_results_path(job_model)) def on_success(self): super().on_success() diff --git a/wb/main/jobs/dev_cloud/accuracy/parse_accuracy_result_job.py b/wb/main/jobs/dev_cloud/accuracy/parse_accuracy_result_job.py index 645173d7..87ed5b85 100644 --- a/wb/main/jobs/dev_cloud/accuracy/parse_accuracy_result_job.py +++ b/wb/main/jobs/dev_cloud/accuracy/parse_accuracy_result_job.py @@ -83,7 +83,29 @@ def run(self): def _extract_accuracy_results(archive_path: str, destination_path: Path): create_empty_dir(destination_path) with tarfile.open(archive_path, 'r:gz') as tar: - tar.extractall(destination_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, destination_path) def on_success(self): self._job_state_subject.update_state(status=StatusEnum.ready, diff --git a/wb/main/jobs/dev_cloud/accuracy/parse_annotate_dataset_result_job.py b/wb/main/jobs/dev_cloud/accuracy/parse_annotate_dataset_result_job.py index 3d24b7ec..8c38a5d3 100644 --- a/wb/main/jobs/dev_cloud/accuracy/parse_annotate_dataset_result_job.py +++ b/wb/main/jobs/dev_cloud/accuracy/parse_annotate_dataset_result_job.py @@ -78,7 +78,29 @@ def run(self): def _extract_artifact(archive_path: str, destination_path: Path): create_empty_dir(destination_path) with tarfile.open(archive_path, 'r:gz') as tar: - tar.extractall(destination_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, destination_path) def on_success(self): self._job_state_subject.update_state(status=StatusEnum.ready, diff --git a/wb/main/jobs/dev_cloud/accuracy/parse_per_tensor_distance_calculation_result_job.py b/wb/main/jobs/dev_cloud/accuracy/parse_per_tensor_distance_calculation_result_job.py index 1b765622..75ad513f 100644 --- a/wb/main/jobs/dev_cloud/accuracy/parse_per_tensor_distance_calculation_result_job.py +++ b/wb/main/jobs/dev_cloud/accuracy/parse_per_tensor_distance_calculation_result_job.py @@ -72,7 +72,29 @@ def run(self): def _extract_artifact(archive_path: str, destination_path: Path): create_empty_dir(destination_path) with tarfile.open(archive_path, 'r:gz') as tar: - tar.extractall(destination_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, destination_path) def on_success(self): self._job_state_subject.update_state(status=StatusEnum.ready, diff --git a/wb/main/jobs/dev_cloud/int8_calibration/parse_int8_calibration_result_job.py b/wb/main/jobs/dev_cloud/int8_calibration/parse_int8_calibration_result_job.py index 4185f84f..5ab06b5d 100644 --- a/wb/main/jobs/dev_cloud/int8_calibration/parse_int8_calibration_result_job.py +++ b/wb/main/jobs/dev_cloud/int8_calibration/parse_int8_calibration_result_job.py @@ -59,7 +59,29 @@ def run(self): def extract_calibrated_model(self, archive_path: str, destination_path: str): create_empty_dir(destination_path) with tarfile.open(archive_path, 'r:gz') as tar: - tar.extractall(destination_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, destination_path) Int8CalibrationJob.move_optimized_model(destination_path, destination_path, self.job_id) def on_success(self): diff --git a/wb/main/jobs/int8_calibration/remote_int8_calibration_job.py b/wb/main/jobs/int8_calibration/remote_int8_calibration_job.py index eae6f5d0..edd08836 100644 --- a/wb/main/jobs/int8_calibration/remote_int8_calibration_job.py +++ b/wb/main/jobs/int8_calibration/remote_int8_calibration_job.py @@ -84,7 +84,26 @@ def collect_artifacts(self): result_archive = os.path.join(tmp_folder, JOB_ARTIFACTS_ARCHIVE_NAME) collect_artifacts(target.id, dest_archive, result_archive, session) with tarfile.open(result_archive, 'r:gz') as tar: - tar.extractall(path=tmp_folder) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=tmp_folder) create_empty_dir(job_model.result_model.path) self.move_optimized_model(tmp_folder, job_model.result_model.path, self.job_id) self._clean_paths()