From 0fc170635f95919c66769f8e8ad8a100d5fbe19f Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Mon, 7 Jul 2025 17:28:29 +0100 Subject: [PATCH 1/9] Add sftp server for tests --- pixl_core/pyproject.toml | 66 +++++----- pixl_core/tests/uploader/test_sftp.py | 170 ++++++++++++++++++++++++++ 2 files changed, 203 insertions(+), 33 deletions(-) create mode 100644 pixl_core/tests/uploader/test_sftp.py diff --git a/pixl_core/pyproject.toml b/pixl_core/pyproject.toml index fa8b98bc..a37e2f72 100644 --- a/pixl_core/pyproject.toml +++ b/pixl_core/pyproject.toml @@ -7,24 +7,26 @@ readme = "README.md" requires-python = ">=3.11" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "aio_pika==9.5.3", - "azure-identity==1.19.0", - "azure-keyvault==4.2.0", - "fastapi==0.115.6", - "jsonpickle==4.0.0", - "loguru==0.7.3", - "pandas==2.2.3", - "pika==1.3.2", - "psycopg2-binary==2.9.10", - "pyarrow==18.1.0", - "pydantic==2.10.3", - "python-decouple==3.8", - "python-slugify==8.0.4", - "PyYAML==6.0.2", - "requests==2.32.3", - "sqlalchemy==2.0.36", - "token-bucket==0.3.0", - "xnat==0.6.2", + "aio_pika==9.5.3", + "azure-identity==1.19.0", + "azure-keyvault==4.2.0", + "docker==7.1.0", + "fastapi==0.115.6", + "jsonpickle==4.0.0", + "loguru==0.7.3", + "pandas==2.2.3", + "paramiko==3.5.1", + "pika==1.3.2", + "psycopg2-binary==2.9.10", + "pyarrow==18.1.0", + "pydantic==2.10.3", + "python-decouple==3.8", + "python-slugify==8.0.4", + "PyYAML==6.0.2", + "requests==2.32.3", + "sqlalchemy==2.0.36", + "token-bucket==0.3.0", + "xnat==0.6.2", ] @@ -33,9 +35,7 @@ requires = ["hatchling>=1.0.0"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -dev-mode-dirs = [ - "src" -] +dev-mode-dirs = ["src"] [tool.pytest.ini_options] @@ -50,16 +50,16 @@ extend = "../ruff.toml" [tool.coverage.report] exclude_also = [ - "def __repr__", - "if self.debug:", - "if settings.DEBUG", - "except subprocess.CalledProcessError as exception:", - "raise AssertionError", - "raise NotImplementedError", - "if 0:", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", - "if typing.TYPE_CHECKING", - "class .*\\bProtocol\\):", - "@(abc\\.)?abstractmethod", + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", ] diff --git a/pixl_core/tests/uploader/test_sftp.py b/pixl_core/tests/uploader/test_sftp.py new file mode 100644 index 00000000..799b7c55 --- /dev/null +++ b/pixl_core/tests/uploader/test_sftp.py @@ -0,0 +1,170 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test uploading files to an SFTP endpoint.""" + +import os +import shutil +import tempfile +import time +from collections.abc import Generator +from pathlib import Path +from typing import Optional + +import docker +import paramiko +import pytest +from core.uploader._sftp import SFTPUploader +from decouple import config + +TEST_DIR = Path(__file__).parents[1] + +os.environ["SFTP_HOST"] = "localhost" +os.environ["SFTP_USERNAME"] = "testuser" +os.environ["SFTP_PASSWORD"] = "testpass" +os.environ["SFTP_PORT"] = "2222" + + +class MockSFTPUploader(SFTPUploader): + """Mock SFTPUploader for testing.""" + + def __init__(self) -> None: + """Initialise the mock uploader with hardcoded values for SFTP config.""" + self.host = os.environ["SFTP_HOST"] + self.user = os.environ["SFTP_USERNAME"] + self.password = os.environ["SFTP_PASSWORD"] + self.port = int(os.environ["SFTP_PORT"]) + self.project_slug = "test-project" + + def _set_config(self) -> None: + """Override to avoid Azure Key Vault dependency in tests.""" + + +class PixlSFTPServer: + """Docker-based SFTP server for testing""" + + def __init__(self) -> None: + """Initialize the DockerSFTPServer""" + self.username = config("SFTP_USERNAME", default="testuser") + self.password = config("SFTP_PASSWORD", default="testpass") + self.port = int(config("SFTP_PORT", default=2222)) + self.docker_client: docker.DockerClient = docker.from_env() + self.container: Optional[docker.models.containers.Container] = None + self.upload_dir: Optional[Path] = None + + def start(self) -> dict: + """Start the SFTP server container""" + temp_dir = tempfile.mkdtemp() + + # Create users.conf for the SFTP server + users_conf = f"{self.username}:{self.password}:1001:1001:upload" + users_conf_path = Path(temp_dir) / "users.conf" + users_conf_path.write_text(users_conf) + + self.upload_dir = Path(temp_dir) / "upload" + self.upload_dir.mkdir(parents=True, exist_ok=True) + + # Start container + self.container = self.docker_client.containers.run( + "atmoz/sftp:alpine", + command=f"{self.username}:{self.password}:::upload", + ports={"22/tcp": self.port}, + volumes={str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}}, + detach=True, + remove=True, + ) + + # Wait for container to be ready + self._wait_for_server() + + return { + "host": "localhost", + "port": self.port, + "username": self.username, + "password": self.password, + "upload_dir": self.upload_dir, + } + + def _wait_for_server(self, timeout: int = 30) -> None: + """Wait for SFTP server to be ready""" + start_time = time.time() + while time.time() - start_time < timeout: + try: + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # noqa: S507 + ssh.connect( + "localhost", + port=self.port, + username=self.username, + password=self.password, + timeout=5, + ) + sftp = ssh.open_sftp() + sftp.close() + ssh.close() + except (paramiko.SSHException, OSError, ConnectionError): + time.sleep(1) + else: + return # Connection successful + + err_str = f"SFTP server did not start within {timeout} seconds" + raise TimeoutError(err_str) + + def stop(self) -> None: + """Stop the SFTP server container""" + if self.container: + self.container.stop() + self.container = None + + if self.upload_dir: + shutil.rmtree(self.upload_dir, ignore_errors=True) + self.upload_dir = None + + def is_running(self) -> bool: + """Check if the SFTP server is running""" + if not self.container: + return False + try: + self.container.reload() + except docker.errors.NotFound: + return False + else: + return self.container.status == "running" + + +@pytest.fixture(scope="session") +def sftp_server() -> Generator[PixlSFTPServer, None, None]: + """Return a running SFTP server container.""" + server = PixlSFTPServer() + server.start() + yield server + server.stop() + + +@pytest.fixture() +def sftp_uploader() -> MockSFTPUploader: + """Return a MockSFTPUploader object.""" + return MockSFTPUploader() + + +@pytest.fixture() +def zip_content() -> Generator: + """Directory containing the test data for uploading to the sftp server.""" + test_zip_file = TEST_DIR / "data" / "public.zip" + with test_zip_file.open("rb") as file_content: + yield file_content + + +def test_sftp_server_can_connect(sftp_server: PixlSFTPServer) -> None: + """Tests that the SFTP server can be connected to.""" + assert sftp_server.is_running() From 6d2498d82d3d6e3bf484d7cd5502b1111f72d7ec Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Mon, 7 Jul 2025 19:51:16 +0100 Subject: [PATCH 2/9] Update pre-commit deps --- .pre-commit-config.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e1ebc7b..6fe2c616 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,9 +32,11 @@ repos: [ "mypy==1.11.1", "types-PyYAML", - "types-requests", - "types-python-slugify", + "types-docker", + "types-paramiko", "types-psycopg2", + "types-python-slugify", + "types-requests", ] - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.28.6 From 38d37c49ec8378b4b356016a7b5fb136752066a4 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Mon, 7 Jul 2025 19:51:30 +0100 Subject: [PATCH 3/9] Implement `SFTPUploader` --- pixl_core/src/core/uploader/_sftp.py | 130 +++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 pixl_core/src/core/uploader/_sftp.py diff --git a/pixl_core/src/core/uploader/_sftp.py b/pixl_core/src/core/uploader/_sftp.py new file mode 100644 index 00000000..a5e6e731 --- /dev/null +++ b/pixl_core/src/core/uploader/_sftp.py @@ -0,0 +1,130 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Uploader subclass for SFTP.""" + +from typing import BinaryIO, Optional + +import paramiko +from loguru import logger + +from core.exports import ParquetExport +from core.uploader._orthanc import StudyTags, get_study_zip_archive +from core.uploader.base import Uploader + + +class SFTPUploader(Uploader): + """Upload strategy for an SFTP server.""" + + def __init__(self, project_slug: str, keyvault_alias: Optional[str]) -> None: + """Create instance of parent class""" + super().__init__(project_slug, keyvault_alias) + + def _set_config(self) -> None: + """Set the configuration for the SFTP uploader.""" + # Use the Azure KV alias as prefix if it exists, otherwise use the project name + az_prefix = self.keyvault_alias + az_prefix = az_prefix if az_prefix else self.project_slug + + # Get SFTP connection details from keyvault + self.host = self.keyvault.fetch_secret(f"{az_prefix}--sftp--host") + self.username = self.keyvault.fetch_secret(f"{az_prefix}--sftp--username") + self.password = self.keyvault.fetch_secret(f"{az_prefix}--sftp--password") + self.port = int(self.keyvault.fetch_secret(f"{az_prefix}--sftp--port")) + self.host_key_path = self.keyvault.fetch_secret(f"{az_prefix}--sftp--host-key-path") + + def _upload_dicom_image(self, study_id: str, study_tags: StudyTags) -> None: + """ + Upload DICOM image via SFTP. + + :param study_id: Orthanc Study ID + :param study_tags: Study tags containing metadata + """ + # Get DICOM zip archive from Orthanc + zip_content = get_study_zip_archive(study_id) + self.send_via_sftp( + zip_content, + study_tags.pseudo_anon_image_id, + remote_directory=self.project_slug, + ) + + def send_via_sftp( + self, zip_content: BinaryIO, pseudo_anon_image_id: str, remote_directory: str + ) -> None: + """Send the zip content to the SFTP server.""" + filename = f"{pseudo_anon_image_id}.zip" + + self._connect_client() + with self._connect_client() as sftp_client: + _sftp_create_remote_directory(sftp_client, remote_directory) + sftp_client.chdir(remote_directory) + sftp_client.putfo(zip_content, filename) + + def upload_parquet_files(self, parquet_export: ParquetExport) -> None: + """ + Upload parquet to FTPS under //parquet. + :param parquet_export: instance of the ParquetExport class + The final directory structure will look like this: + + ├── + │ └── parquet + │ ├── omop + │ │ └── public + │ │ └── PROCEDURE_OCCURRENCE.parquet + │ └── radiology + │ └── IMAGE_LINKER.parquet + ├── .zip + └── .zip + ... + """ + logger.info("Starting SFTP upload of files for '{}'", parquet_export.project_slug) + + source_root_dir = parquet_export.current_extract_base + source_files = [f for f in source_root_dir.rglob("*.parquet") if f.is_file()] + if not source_files: + msg = f"No files found in {source_root_dir}" + raise FileNotFoundError(msg) + + remote_directory = f"{self.project_slug}/{parquet_export.extract_time_slug}/parquet" + with self._connect_client() as sftp_client: + _sftp_create_remote_directory(sftp_client, remote_directory) + sftp_client.chdir(remote_directory) + for source_path in source_files: + sftp_client.put(source_path, source_path.name) + + logger.info("Finished SFTP upload of files for '{}'", parquet_export.project_slug) + + def _connect_client(self) -> paramiko.SFTPClient: + """Connect to the SFTP client""" + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.RejectPolicy()) + ssh_client.load_host_keys(self.host_key_path) + ssh_client.connect( + self.host, port=self.port, username=self.username, password=self.password + ) + return ssh_client.open_sftp() + + +def _sftp_create_remote_directory(sftp_client: paramiko.SFTPClient, directory: str) -> None: + """ + Create remote directory if it doesn't exist. + + :param sftp_client: SFTP client instance + :param directory: Directory path to create + """ + try: + sftp_client.stat(directory) + except FileNotFoundError: + sftp_client.mkdir(directory) + logger.debug(f"Created remote directory: {directory}") From 3f68937ed242dbf26d8a3c9c21e26b12cdbf1be4 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Mon, 7 Jul 2025 20:54:08 +0100 Subject: [PATCH 4/9] Set up host keys for test --- pixl_core/tests/uploader/test_sftp.py | 53 +++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/pixl_core/tests/uploader/test_sftp.py b/pixl_core/tests/uploader/test_sftp.py index 799b7c55..bc06e9b9 100644 --- a/pixl_core/tests/uploader/test_sftp.py +++ b/pixl_core/tests/uploader/test_sftp.py @@ -26,6 +26,7 @@ import pytest from core.uploader._sftp import SFTPUploader from decouple import config +from loguru import logger TEST_DIR = Path(__file__).parents[1] @@ -38,12 +39,13 @@ class MockSFTPUploader(SFTPUploader): """Mock SFTPUploader for testing.""" - def __init__(self) -> None: + def __init__(self, host_key_path: Path) -> None: """Initialise the mock uploader with hardcoded values for SFTP config.""" self.host = os.environ["SFTP_HOST"] self.user = os.environ["SFTP_USERNAME"] self.password = os.environ["SFTP_PASSWORD"] self.port = int(os.environ["SFTP_PORT"]) + self.host_key_path = host_key_path self.project_slug = "test-project" def _set_config(self) -> None: @@ -53,12 +55,13 @@ def _set_config(self) -> None: class PixlSFTPServer: """Docker-based SFTP server for testing""" - def __init__(self) -> None: + def __init__(self, host_key_path: Path) -> None: """Initialize the DockerSFTPServer""" self.username = config("SFTP_USERNAME", default="testuser") self.password = config("SFTP_PASSWORD", default="testpass") self.port = int(config("SFTP_PORT", default=2222)) self.docker_client: docker.DockerClient = docker.from_env() + self.host_key_path = host_key_path self.container: Optional[docker.models.containers.Container] = None self.upload_dir: Optional[Path] = None @@ -79,7 +82,13 @@ def start(self) -> dict: "atmoz/sftp:alpine", command=f"{self.username}:{self.password}:::upload", ports={"22/tcp": self.port}, - volumes={str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}}, + volumes={ + str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}, + str(self.host_key_path / "ssh_host_rsa_key"): { + "bind": "/etc/ssh/ssh_host_rsa_key", + "mode": "ro", + }, + }, detach=True, remove=True, ) @@ -101,7 +110,7 @@ def _wait_for_server(self, timeout: int = 30) -> None: while time.time() - start_time < timeout: try: ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # noqa: S507 + ssh.load_host_keys(self.host_key_path / "known_hosts") ssh.connect( "localhost", port=self.port, @@ -113,11 +122,13 @@ def _wait_for_server(self, timeout: int = 30) -> None: sftp.close() ssh.close() except (paramiko.SSHException, OSError, ConnectionError): + logger.info("Retrying SFTP connection") time.sleep(1) else: return # Connection successful err_str = f"SFTP server did not start within {timeout} seconds" + self.stop() raise TimeoutError(err_str) def stop(self) -> None: @@ -142,10 +153,36 @@ def is_running(self) -> bool: return self.container.status == "running" -@pytest.fixture(scope="session") -def sftp_server() -> Generator[PixlSFTPServer, None, None]: +@pytest.fixture(scope="module") +def host_keys(tmp_path_factory) -> Path: + """Generates host keys and stores them in a temporary directory""" + host_key_path = tmp_path_factory.mktemp("host_keys") + + private_key = paramiko.RSAKey.generate(2048) + private_key_path = host_key_path / "ssh_host_rsa_key" + private_key.write_private_key_file(private_key_path) + Path.chmod(private_key_path, 0o600) + + public_key = private_key.get_base64() + public_key_path = host_key_path / f"{private_key.get_name()}.pub" + public_key_path.write_text(f"{private_key.get_name()} {public_key}\n") + Path.chmod(public_key_path, 0o644) + + # Generate the known_hosts file + known_hosts_path = host_key_path / "known_hosts" + port = os.environ["SFTP_PORT"] + known_hosts_path.write_text(f"localhost:{port} ssh-rsa {public_key}\n") + logger.debug(f"Generated known_hosts file at {known_hosts_path}") + logger.debug(f"known_hosts contents: {known_hosts_path.read_text()}") + Path.chmod(known_hosts_path, 0o644) + + return host_key_path + + +@pytest.fixture(scope="module") +def sftp_server(host_keys) -> Generator[PixlSFTPServer, None, None]: """Return a running SFTP server container.""" - server = PixlSFTPServer() + server = PixlSFTPServer(host_keys) server.start() yield server server.stop() @@ -165,6 +202,6 @@ def zip_content() -> Generator: yield file_content -def test_sftp_server_can_connect(sftp_server: PixlSFTPServer) -> None: +def test_sftp_server_can_connect(sftp_server): """Tests that the SFTP server can be connected to.""" assert sftp_server.is_running() From c90574c1c9719d7889b81e17447849f73e28694f Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Mon, 7 Jul 2025 22:26:05 +0100 Subject: [PATCH 5/9] refactor: Move `SFTPServer` to separate helper module --- .../tests/uploader/helpers/sftpserver.py | 162 +++++++++++++++++ pixl_core/tests/uploader/test_sftp.py | 163 +++--------------- 2 files changed, 190 insertions(+), 135 deletions(-) create mode 100644 pixl_core/tests/uploader/helpers/sftpserver.py diff --git a/pixl_core/tests/uploader/helpers/sftpserver.py b/pixl_core/tests/uploader/helpers/sftpserver.py new file mode 100644 index 00000000..a0484327 --- /dev/null +++ b/pixl_core/tests/uploader/helpers/sftpserver.py @@ -0,0 +1,162 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import tempfile +import time +from pathlib import Path +from typing import Optional + +import docker +import paramiko +from decouple import config +from loguru import logger + + +class SFTPServer: + """SFTP server running in a Docker container for testing""" + + def __init__(self, host_key_path: Path) -> None: + """Initialize the DockerSFTPServer""" + self.username = config("SFTP_USERNAME", default="testuser") + self.password = config("SFTP_PASSWORD", default="testpass") + self.port = int(config("SFTP_PORT", default=2222)) + self.docker_client: docker.DockerClient = docker.from_env() + self.host_key_path = host_key_path + self.container: Optional[docker.models.containers.Container] = None + self.upload_dir: Optional[Path] = None + + def start(self) -> dict: + """Start the SFTP server container""" + temp_dir = tempfile.mkdtemp() + + # Create users.conf for the SFTP server + users_conf = f"{self.username}:{self.password}:1001:1001:upload" + users_conf_path = Path(temp_dir) / "users.conf" + users_conf_path.write_text(users_conf) + + self.upload_dir = Path(temp_dir) / "upload" + self.upload_dir.mkdir(parents=True, exist_ok=True) + + # Start container + self.container = self.docker_client.containers.run( + "atmoz/sftp:alpine", + command=f"{self.username}:{self.password}:::upload", + ports={"22/tcp": self.port}, + volumes={str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}}, + detach=True, + remove=True, + ) + + # Wait for container to be ready and extract host keys + self._wait_for_server() + self._extract_host_keys() + + return { + "host": "localhost", + "port": self.port, + "username": self.username, + "password": self.password, + "upload_dir": self.upload_dir, + } + + def _wait_for_server(self, timeout: int = 30) -> None: + """Wait for SFTP server to be ready""" + start_time = time.time() + while time.time() - start_time < timeout: + try: + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # noqa: S507 + ssh.connect( + "localhost", + port=self.port, + username=self.username, + password=self.password, + timeout=5, + ) + sftp = ssh.open_sftp() + sftp.close() + ssh.close() + except (paramiko.SSHException, OSError, ConnectionError, docker.errors.NotFound) as e: + logger.info(f"Retrying SFTP connection: {e}") + time.sleep(1) + else: + return # Connection successful + + err_str = f"SFTP server did not start within {timeout} seconds" + self.stop() + raise TimeoutError(err_str) + + def _extract_host_keys(self) -> None: + """Extract host keys from the running container""" + if not self.container: + msg = "Container not started" + raise RuntimeError(msg) + + key_types = ["ssh_host_ed25519_key", "ssh_host_rsa_key", "ssh_host_ecdsa_key"] + host_keys = [] + + for key_type in key_types: + exit_code, output = self.container.exec_run(f"cat /etc/ssh/{key_type}.pub") + if exit_code == 0: + host_key_content = output.decode().strip() + logger.debug(f"Extracted {key_type}: {host_key_content}") + host_keys.append(host_key_content) + + if not host_keys: + msg = "No host keys found in container" + raise RuntimeError(msg) + + # Create known_hosts file with all available keys + known_hosts_path = self.host_key_path / "known_hosts" + known_hosts_content = "" + + for host_key_content in host_keys: + parts = host_key_content.split() + if len(parts) >= 2: + key_type = parts[0] # e.g., "ssh-ed25519", "ssh-rsa" + key_data = parts[1] # base64 encoded key + known_hosts_content += f"[localhost]:{self.port} {key_type} {key_data}\n" + + known_hosts_path.write_text(known_hosts_content) + Path.chmod(known_hosts_path, 0o644) + + logger.debug(f"Created known_hosts file: {known_hosts_content}") + + # Also save the first public key for reference + if host_keys: + public_key_path = self.host_key_path / "ssh_host_key.pub" + public_key_path.write_text(host_keys[0] + "\n") + Path.chmod(public_key_path, 0o644) + + def stop(self) -> None: + """Stop the SFTP server container""" + if self.container: + self.container.stop() + self.container = None + + if self.upload_dir: + shutil.rmtree(self.upload_dir, ignore_errors=True) + self.upload_dir = None + + def is_running(self) -> bool: + """Check if the SFTP server is running""" + if not self.container: + return False + try: + self.container.reload() + except docker.errors.NotFound: + return False + else: + return self.container.status == "running" diff --git a/pixl_core/tests/uploader/test_sftp.py b/pixl_core/tests/uploader/test_sftp.py index bc06e9b9..0aa3da9b 100644 --- a/pixl_core/tests/uploader/test_sftp.py +++ b/pixl_core/tests/uploader/test_sftp.py @@ -14,19 +14,13 @@ """Test uploading files to an SFTP endpoint.""" import os -import shutil -import tempfile -import time from collections.abc import Generator from pathlib import Path -from typing import Optional -import docker import paramiko import pytest from core.uploader._sftp import SFTPUploader -from decouple import config -from loguru import logger +from pixl_core.tests.uploader.helpers.sftpserver import SFTPServer TEST_DIR = Path(__file__).parents[1] @@ -52,146 +46,25 @@ def _set_config(self) -> None: """Override to avoid Azure Key Vault dependency in tests.""" -class PixlSFTPServer: - """Docker-based SFTP server for testing""" - - def __init__(self, host_key_path: Path) -> None: - """Initialize the DockerSFTPServer""" - self.username = config("SFTP_USERNAME", default="testuser") - self.password = config("SFTP_PASSWORD", default="testpass") - self.port = int(config("SFTP_PORT", default=2222)) - self.docker_client: docker.DockerClient = docker.from_env() - self.host_key_path = host_key_path - self.container: Optional[docker.models.containers.Container] = None - self.upload_dir: Optional[Path] = None - - def start(self) -> dict: - """Start the SFTP server container""" - temp_dir = tempfile.mkdtemp() - - # Create users.conf for the SFTP server - users_conf = f"{self.username}:{self.password}:1001:1001:upload" - users_conf_path = Path(temp_dir) / "users.conf" - users_conf_path.write_text(users_conf) - - self.upload_dir = Path(temp_dir) / "upload" - self.upload_dir.mkdir(parents=True, exist_ok=True) - - # Start container - self.container = self.docker_client.containers.run( - "atmoz/sftp:alpine", - command=f"{self.username}:{self.password}:::upload", - ports={"22/tcp": self.port}, - volumes={ - str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}, - str(self.host_key_path / "ssh_host_rsa_key"): { - "bind": "/etc/ssh/ssh_host_rsa_key", - "mode": "ro", - }, - }, - detach=True, - remove=True, - ) - - # Wait for container to be ready - self._wait_for_server() - - return { - "host": "localhost", - "port": self.port, - "username": self.username, - "password": self.password, - "upload_dir": self.upload_dir, - } - - def _wait_for_server(self, timeout: int = 30) -> None: - """Wait for SFTP server to be ready""" - start_time = time.time() - while time.time() - start_time < timeout: - try: - ssh = paramiko.SSHClient() - ssh.load_host_keys(self.host_key_path / "known_hosts") - ssh.connect( - "localhost", - port=self.port, - username=self.username, - password=self.password, - timeout=5, - ) - sftp = ssh.open_sftp() - sftp.close() - ssh.close() - except (paramiko.SSHException, OSError, ConnectionError): - logger.info("Retrying SFTP connection") - time.sleep(1) - else: - return # Connection successful - - err_str = f"SFTP server did not start within {timeout} seconds" - self.stop() - raise TimeoutError(err_str) - - def stop(self) -> None: - """Stop the SFTP server container""" - if self.container: - self.container.stop() - self.container = None - - if self.upload_dir: - shutil.rmtree(self.upload_dir, ignore_errors=True) - self.upload_dir = None - - def is_running(self) -> bool: - """Check if the SFTP server is running""" - if not self.container: - return False - try: - self.container.reload() - except docker.errors.NotFound: - return False - else: - return self.container.status == "running" - - @pytest.fixture(scope="module") def host_keys(tmp_path_factory) -> Path: - """Generates host keys and stores them in a temporary directory""" - host_key_path = tmp_path_factory.mktemp("host_keys") - - private_key = paramiko.RSAKey.generate(2048) - private_key_path = host_key_path / "ssh_host_rsa_key" - private_key.write_private_key_file(private_key_path) - Path.chmod(private_key_path, 0o600) - - public_key = private_key.get_base64() - public_key_path = host_key_path / f"{private_key.get_name()}.pub" - public_key_path.write_text(f"{private_key.get_name()} {public_key}\n") - Path.chmod(public_key_path, 0o644) - - # Generate the known_hosts file - known_hosts_path = host_key_path / "known_hosts" - port = os.environ["SFTP_PORT"] - known_hosts_path.write_text(f"localhost:{port} ssh-rsa {public_key}\n") - logger.debug(f"Generated known_hosts file at {known_hosts_path}") - logger.debug(f"known_hosts contents: {known_hosts_path.read_text()}") - Path.chmod(known_hosts_path, 0o644) - - return host_key_path + """Creates temporary directory for host keys (will be populated by server)""" + return tmp_path_factory.mktemp("host_keys") @pytest.fixture(scope="module") -def sftp_server(host_keys) -> Generator[PixlSFTPServer, None, None]: +def sftp_server(host_keys) -> Generator[SFTPServer, None, None]: """Return a running SFTP server container.""" - server = PixlSFTPServer(host_keys) + server = SFTPServer(host_keys) server.start() yield server server.stop() @pytest.fixture() -def sftp_uploader() -> MockSFTPUploader: +def sftp_uploader(host_keys) -> MockSFTPUploader: """Return a MockSFTPUploader object.""" - return MockSFTPUploader() + return MockSFTPUploader(host_keys) @pytest.fixture() @@ -202,6 +75,26 @@ def zip_content() -> Generator: yield file_content -def test_sftp_server_can_connect(sftp_server): +def test_sftp_server_can_connect(sftp_server, sftp_uploader): """Tests that the SFTP server can be connected to.""" assert sftp_server.is_running() + + # Test actual SFTP connection using the uploader + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.RejectPolicy()) + ssh_client.load_host_keys(str(sftp_uploader.host_key_path / "known_hosts")) + + try: + ssh_client.connect( + sftp_uploader.host, + port=sftp_uploader.port, + username=sftp_uploader.user, + password=sftp_uploader.password, + timeout=5, + ) + sftp_client = ssh_client.open_sftp() + sftp_client.close() + ssh_client.close() + except paramiko.SSHException as e: + ssh_client.close() + pytest.fail(f"Failed to connect to SFTP server: {e}") From da3312b430ea11ecd59a61062693a43e4306a6e3 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:00:26 +0100 Subject: [PATCH 6/9] Fix parquet uploader, pass tests --- pixl_core/src/core/uploader/_sftp.py | 42 ++++-- .../tests/uploader/helpers/sftpserver.py | 26 ++-- pixl_core/tests/uploader/test_sftp.py | 132 ++++++++++++++---- 3 files changed, 148 insertions(+), 52 deletions(-) diff --git a/pixl_core/src/core/uploader/_sftp.py b/pixl_core/src/core/uploader/_sftp.py index a5e6e731..cdad5106 100644 --- a/pixl_core/src/core/uploader/_sftp.py +++ b/pixl_core/src/core/uploader/_sftp.py @@ -14,6 +14,9 @@ """Uploader subclass for SFTP.""" +import os +from collections.abc import Generator +from contextlib import contextmanager from typing import BinaryIO, Optional import paramiko @@ -42,7 +45,7 @@ def _set_config(self) -> None: self.username = self.keyvault.fetch_secret(f"{az_prefix}--sftp--username") self.password = self.keyvault.fetch_secret(f"{az_prefix}--sftp--password") self.port = int(self.keyvault.fetch_secret(f"{az_prefix}--sftp--port")) - self.host_key_path = self.keyvault.fetch_secret(f"{az_prefix}--sftp--host-key-path") + self.known_hosts_path = self.keyvault.fetch_secret(f"{az_prefix}--sftp--known-hosts-path") def _upload_dicom_image(self, study_id: str, study_tags: StudyTags) -> None: """ @@ -65,13 +68,12 @@ def send_via_sftp( """Send the zip content to the SFTP server.""" filename = f"{pseudo_anon_image_id}.zip" - self._connect_client() with self._connect_client() as sftp_client: _sftp_create_remote_directory(sftp_client, remote_directory) sftp_client.chdir(remote_directory) sftp_client.putfo(zip_content, filename) - def upload_parquet_files(self, parquet_export: ParquetExport) -> None: + def upload_parquet_files(self, parquet_export: ParquetExport, remote_directory: str) -> None: """ Upload parquet to FTPS under //parquet. :param parquet_export: instance of the ParquetExport class @@ -96,29 +98,43 @@ def upload_parquet_files(self, parquet_export: ParquetExport) -> None: msg = f"No files found in {source_root_dir}" raise FileNotFoundError(msg) - remote_directory = f"{self.project_slug}/{parquet_export.extract_time_slug}/parquet" + parquet_dir = f"{parquet_export.project_slug}/{parquet_export.extract_time_slug}/parquet" + upload_dir = f"{remote_directory}/{parquet_dir}" with self._connect_client() as sftp_client: - _sftp_create_remote_directory(sftp_client, remote_directory) - sftp_client.chdir(remote_directory) + _sftp_create_remote_directory(sftp_client, upload_dir) for source_path in source_files: - sftp_client.put(source_path, source_path.name) + sftp_client.chdir(None) # reset + sftp_client.chdir(upload_dir) + + source_rel_path = source_path.relative_to(source_root_dir) + source_rel_dir = source_rel_path.parent + source_filename_only = source_rel_path.relative_to(source_rel_dir) + _sftp_create_remote_directory(sftp_client, str(source_rel_dir)) + sftp_client.chdir(str(source_rel_dir)) + sftp_client.put(source_path, str(source_filename_only)) logger.info("Finished SFTP upload of files for '{}'", parquet_export.project_slug) - def _connect_client(self) -> paramiko.SFTPClient: - """Connect to the SFTP client""" + @contextmanager + def _connect_client(self) -> Generator[paramiko.SFTPClient, None, None]: + """Connect to the SFTP client as a context manager""" ssh_client = paramiko.SSHClient() ssh_client.set_missing_host_key_policy(paramiko.RejectPolicy()) - ssh_client.load_host_keys(self.host_key_path) + ssh_client.load_host_keys(self.known_hosts_path) ssh_client.connect( self.host, port=self.port, username=self.username, password=self.password ) - return ssh_client.open_sftp() + sftp_client = ssh_client.open_sftp() + try: + yield sftp_client + finally: + sftp_client.close() + ssh_client.close() def _sftp_create_remote_directory(sftp_client: paramiko.SFTPClient, directory: str) -> None: """ - Create remote directory if it doesn't exist. + Create remote directory and its parents if it doesn't exist. :param sftp_client: SFTP client instance :param directory: Directory path to create @@ -126,5 +142,7 @@ def _sftp_create_remote_directory(sftp_client: paramiko.SFTPClient, directory: s try: sftp_client.stat(directory) except FileNotFoundError: + parent_dir = os.path.dirname(directory) # noqa: PTH120 + _sftp_create_remote_directory(sftp_client, str(parent_dir)) sftp_client.mkdir(directory) logger.debug(f"Created remote directory: {directory}") diff --git a/pixl_core/tests/uploader/helpers/sftpserver.py b/pixl_core/tests/uploader/helpers/sftpserver.py index a0484327..f4f6cb25 100644 --- a/pixl_core/tests/uploader/helpers/sftpserver.py +++ b/pixl_core/tests/uploader/helpers/sftpserver.py @@ -35,26 +35,31 @@ def __init__(self, host_key_path: Path) -> None: self.docker_client: docker.DockerClient = docker.from_env() self.host_key_path = host_key_path self.container: Optional[docker.models.containers.Container] = None - self.upload_dir: Optional[Path] = None + self.mounted_upload_dir: Optional[Path] = None def start(self) -> dict: """Start the SFTP server container""" temp_dir = tempfile.mkdtemp() # Create users.conf for the SFTP server - users_conf = f"{self.username}:{self.password}:1001:1001:upload" + users_conf = f"{self.username}:{self.password}:::upload" users_conf_path = Path(temp_dir) / "users.conf" users_conf_path.write_text(users_conf) - self.upload_dir = Path(temp_dir) / "upload" - self.upload_dir.mkdir(parents=True, exist_ok=True) + self.mounted_upload_dir = Path(temp_dir) / "upload" + self.mounted_upload_dir.mkdir(parents=True, exist_ok=True) # Start container self.container = self.docker_client.containers.run( "atmoz/sftp:alpine", command=f"{self.username}:{self.password}:::upload", ports={"22/tcp": self.port}, - volumes={str(self.upload_dir): {"bind": f"/home/{self.username}/upload", "mode": "rw"}}, + volumes={ + str(self.mounted_upload_dir): { + "bind": f"/home/{self.username}/upload", + "mode": "rw", + } + }, detach=True, remove=True, ) @@ -68,7 +73,7 @@ def start(self) -> dict: "port": self.port, "username": self.username, "password": self.password, - "upload_dir": self.upload_dir, + "upload_dir": self.mounted_upload_dir, } def _wait_for_server(self, timeout: int = 30) -> None: @@ -111,7 +116,6 @@ def _extract_host_keys(self) -> None: exit_code, output = self.container.exec_run(f"cat /etc/ssh/{key_type}.pub") if exit_code == 0: host_key_content = output.decode().strip() - logger.debug(f"Extracted {key_type}: {host_key_content}") host_keys.append(host_key_content) if not host_keys: @@ -132,8 +136,6 @@ def _extract_host_keys(self) -> None: known_hosts_path.write_text(known_hosts_content) Path.chmod(known_hosts_path, 0o644) - logger.debug(f"Created known_hosts file: {known_hosts_content}") - # Also save the first public key for reference if host_keys: public_key_path = self.host_key_path / "ssh_host_key.pub" @@ -146,9 +148,9 @@ def stop(self) -> None: self.container.stop() self.container = None - if self.upload_dir: - shutil.rmtree(self.upload_dir, ignore_errors=True) - self.upload_dir = None + if self.mounted_upload_dir: + shutil.rmtree(self.mounted_upload_dir, ignore_errors=True) + self.mounted_upload_dir = None def is_running(self) -> bool: """Check if the SFTP server is running""" diff --git a/pixl_core/tests/uploader/test_sftp.py b/pixl_core/tests/uploader/test_sftp.py index 0aa3da9b..bf0418b1 100644 --- a/pixl_core/tests/uploader/test_sftp.py +++ b/pixl_core/tests/uploader/test_sftp.py @@ -13,16 +13,23 @@ # limitations under the License. """Test uploading files to an SFTP endpoint.""" +import filecmp import os from collections.abc import Generator +from datetime import UTC, datetime from pathlib import Path -import paramiko +import pandas as pd import pytest +from core.db.models import Image +from core.db.queries import update_exported_at +from core.exports import ParquetExport from core.uploader._sftp import SFTPUploader from pixl_core.tests.uploader.helpers.sftpserver import SFTPServer +from pydicom.uid import generate_uid TEST_DIR = Path(__file__).parents[1] +SFTP_UPLOAD_DIR = "upload" os.environ["SFTP_HOST"] = "localhost" os.environ["SFTP_USERNAME"] = "testuser" @@ -33,13 +40,13 @@ class MockSFTPUploader(SFTPUploader): """Mock SFTPUploader for testing.""" - def __init__(self, host_key_path: Path) -> None: + def __init__(self, known_hosts_path: Path) -> None: """Initialise the mock uploader with hardcoded values for SFTP config.""" self.host = os.environ["SFTP_HOST"] - self.user = os.environ["SFTP_USERNAME"] + self.username = os.environ["SFTP_USERNAME"] self.password = os.environ["SFTP_PASSWORD"] self.port = int(os.environ["SFTP_PORT"]) - self.host_key_path = host_key_path + self.known_hosts_path = known_hosts_path self.project_slug = "test-project" def _set_config(self) -> None: @@ -64,7 +71,7 @@ def sftp_server(host_keys) -> Generator[SFTPServer, None, None]: @pytest.fixture() def sftp_uploader(host_keys) -> MockSFTPUploader: """Return a MockSFTPUploader object.""" - return MockSFTPUploader(host_keys) + return MockSFTPUploader(host_keys / "known_hosts") @pytest.fixture() @@ -75,26 +82,95 @@ def zip_content() -> Generator: yield file_content -def test_sftp_server_can_connect(sftp_server, sftp_uploader): - """Tests that the SFTP server can be connected to.""" - assert sftp_server.is_running() - - # Test actual SFTP connection using the uploader - ssh_client = paramiko.SSHClient() - ssh_client.set_missing_host_key_policy(paramiko.RejectPolicy()) - ssh_client.load_host_keys(str(sftp_uploader.host_key_path / "known_hosts")) - - try: - ssh_client.connect( - sftp_uploader.host, - port=sftp_uploader.port, - username=sftp_uploader.user, - password=sftp_uploader.password, - timeout=5, - ) - sftp_client = ssh_client.open_sftp() - sftp_client.close() - ssh_client.close() - except paramiko.SSHException as e: - ssh_client.close() - pytest.fail(f"Failed to connect to SFTP server: {e}") +def test_send_via_sftp( + zip_content, not_yet_exported_dicom_image, sftp_uploader, sftp_server +) -> None: + """Tests that DICOM image can be uploaded to the correct location via SFTP""" + # ARRANGE + pseudo_anon_id = not_yet_exported_dicom_image.pseudo_study_uid + project_slug = "some-project-slug" + expected_output_file = sftp_server.mounted_upload_dir / project_slug / (pseudo_anon_id + ".zip") + + # The mock SFTP server requires files to be uploaded to the upload/ directory + remote_directory = f"{SFTP_UPLOAD_DIR}/{project_slug}" + + # ACT + sftp_uploader.send_via_sftp(zip_content, pseudo_anon_id, remote_directory) + + # ASSERT + assert expected_output_file.exists() + + +def test_update_exported_and_save(rows_in_session) -> None: + """Tests that the exported_at field is updated when a file is uploaded""" + # ARRANGE + expected_export_time = datetime.now(tz=UTC) + + # ACT + update_exported_at(generate_uid(entropy_srcs=["not_yet_exported"]), expected_export_time) + new_row = ( + rows_in_session.query(Image) + .filter(Image.pseudo_study_uid == generate_uid(entropy_srcs=["not_yet_exported"])) + .one() + ) + actual_export_time = new_row.exported_at.replace(tzinfo=UTC) + + # ASSERT + assert actual_export_time == expected_export_time + + +@pytest.fixture() +def parquet_export(export_dir) -> ParquetExport: + """ + Return a ParquetExport object. + + This fixture is deliberately not definied in conftest, because it imports the ParquetExport + class, which in turn loads the PixlConfig class, which in turn requres the PROJECT_CONFIGS_DIR + environment to be set. This environment variable is set in conftest, so the import needs to + happen after that. + """ + return ParquetExport( + project_name_raw="i-am-a-project", + extract_datetime=datetime.now(tz=UTC), + export_dir=export_dir, + ) + + +def test_upload_parquet(parquet_export, sftp_uploader, sftp_server) -> None: + """Tests that parquet files are uploaded to the correct location via SFTP""" + # ARRANGE + # Set up the mock server directory + parquet_export.copy_to_exports(Path(__file__).parents[3] / "test" / "resources" / "omop") + parquet_export.export_radiology_linker(pd.DataFrame(list("dummy"), columns=["D"])) + + # ACT + sftp_uploader.upload_parquet_files(parquet_export, SFTP_UPLOAD_DIR) + + # ASSERT + expected_public_parquet_dir = ( + sftp_server.mounted_upload_dir + / parquet_export.project_slug + / parquet_export.extract_time_slug + / "parquet" + ) + assert expected_public_parquet_dir.exists() + + # Print difference report to aid debugging (it doesn't actually assert anything) + dc = filecmp.dircmp(parquet_export.current_extract_base, expected_public_parquet_dir) + dc.report_full_closure() + assert ( + expected_public_parquet_dir / "omop" / "public" / "PROCEDURE_OCCURRENCE.parquet" + ).exists(), "Public PROCEDURE_OCCURRENCE.parquet file not found" + assert ( + expected_public_parquet_dir / "radiology" / "IMAGE_LINKER.parquet" + ).exists(), "Radiology IMAGE_LINKER.parquet file not found" + + +def test_no_export_to_upload(parquet_export, sftp_uploader, sftp_server) -> None: + """If there is nothing in the export directory, an exception is thrown""" + # ARRANGE + parquet_export.public_output.mkdir(parents=True, exist_ok=True) + + # ACT & ASSERT + with pytest.raises(FileNotFoundError): + sftp_uploader.upload_parquet_files(parquet_export, SFTP_UPLOAD_DIR) From 1dfd8d36dc21364a861ba4b67715e29ede268551 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Tue, 8 Jul 2025 12:35:44 +0100 Subject: [PATCH 7/9] Catch correct errors Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pixl_core/src/core/uploader/_sftp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixl_core/src/core/uploader/_sftp.py b/pixl_core/src/core/uploader/_sftp.py index cdad5106..7ec0a881 100644 --- a/pixl_core/src/core/uploader/_sftp.py +++ b/pixl_core/src/core/uploader/_sftp.py @@ -141,7 +141,7 @@ def _sftp_create_remote_directory(sftp_client: paramiko.SFTPClient, directory: s """ try: sftp_client.stat(directory) - except FileNotFoundError: + except (IOError, OSError): parent_dir = os.path.dirname(directory) # noqa: PTH120 _sftp_create_remote_directory(sftp_client, str(parent_dir)) sftp_client.mkdir(directory) From d4b302b848265054cd4959de01ba64c428f6af0a Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Tue, 8 Jul 2025 12:36:53 +0100 Subject: [PATCH 8/9] Call `generate_uid` only once Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pixl_core/tests/uploader/test_sftp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pixl_core/tests/uploader/test_sftp.py b/pixl_core/tests/uploader/test_sftp.py index bf0418b1..797e434a 100644 --- a/pixl_core/tests/uploader/test_sftp.py +++ b/pixl_core/tests/uploader/test_sftp.py @@ -107,10 +107,11 @@ def test_update_exported_and_save(rows_in_session) -> None: expected_export_time = datetime.now(tz=UTC) # ACT - update_exported_at(generate_uid(entropy_srcs=["not_yet_exported"]), expected_export_time) + pseudo_study_uid = generate_uid(entropy_srcs=["not_yet_exported"]) + update_exported_at(pseudo_study_uid, expected_export_time) new_row = ( rows_in_session.query(Image) - .filter(Image.pseudo_study_uid == generate_uid(entropy_srcs=["not_yet_exported"])) + .filter(Image.pseudo_study_uid == pseudo_study_uid) .one() ) actual_export_time = new_row.exported_at.replace(tzinfo=UTC) From 5ed119cf8ee6bf8661d217835a5bf14a26207b37 Mon Sep 17 00:00:00 2001 From: Milan Malfait <38256462+milanmlft@users.noreply.github.com> Date: Tue, 8 Jul 2025 12:38:56 +0100 Subject: [PATCH 9/9] Fix typo --- pixl_core/src/core/uploader/_sftp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixl_core/src/core/uploader/_sftp.py b/pixl_core/src/core/uploader/_sftp.py index 7ec0a881..db26d74d 100644 --- a/pixl_core/src/core/uploader/_sftp.py +++ b/pixl_core/src/core/uploader/_sftp.py @@ -75,7 +75,7 @@ def send_via_sftp( def upload_parquet_files(self, parquet_export: ParquetExport, remote_directory: str) -> None: """ - Upload parquet to FTPS under //parquet. + Upload parquet to SFTP under //parquet. :param parquet_export: instance of the ParquetExport class The final directory structure will look like this: @@ -141,7 +141,7 @@ def _sftp_create_remote_directory(sftp_client: paramiko.SFTPClient, directory: s """ try: sftp_client.stat(directory) - except (IOError, OSError): + except OSError: parent_dir = os.path.dirname(directory) # noqa: PTH120 _sftp_create_remote_directory(sftp_client, str(parent_dir)) sftp_client.mkdir(directory)