feat: make subprocess executor robust against invalid encoding and capturing output while printing

xxthunder · xxthunder · commit 1e4680159a9f · 2025-08-11T14:25:50.000+02:00
diff --git a/src/py_app_dev/core/subprocess.py b/src/py_app_dev/core/subprocess.py
@@ -1,88 +1,99 @@
-import shutil
-import subprocess  # nosec
-from pathlib import Path
-from typing import Any
-
-from .exceptions import UserNotificationException
-from .logging import logger
-
-
-def which(app_name: str) -> Path | None:
-    """Return the path to the app if it is in the PATH, otherwise return None."""
-    app_path = shutil.which(app_name)
-    return Path(app_path) if app_path else None
-
-
-class SubprocessExecutor:
-    """
-    Execute a command in a subprocess.
-
-    Args:
-    ----
-        capture_output: If True, the output of the command will be captured.
-        print_output: If True, the output of the command will be printed to the logger.
-                      One can set this to false in order to get the output in the returned CompletedProcess object.
-
-    """
-
-    def __init__(
-        self,
-        command: str | list[str | Path],
-        cwd: Path | None = None,
-        capture_output: bool = True,
-        env: dict[str, str] | None = None,
-        shell: bool = False,
-        print_output: bool = True,
-    ):
-        self.logger = logger.bind()
-        self.command = command
-        self.current_working_directory = cwd
-        self.capture_output = capture_output
-        self.env = env
-        self.shell = shell
-        self.print_output = print_output
-
-    @property
-    def command_str(self) -> str:
-        if isinstance(self.command, str):
-            return self.command
-        return " ".join(str(arg) if not isinstance(arg, str) else arg for arg in self.command)
-
-    def execute(self, handle_errors: bool = True) -> subprocess.CompletedProcess[Any] | None:
-        """Execute the command and return the CompletedProcess object if handle_errors is False."""
-        try:
-            completed_process = None
-            stdout = ""
-            stderr = ""
-            self.logger.info(f"Running command: {self.command_str}")
-            cwd_path = (self.current_working_directory or Path.cwd()).as_posix()
-            with subprocess.Popen(
-                args=self.command,
-                cwd=cwd_path,
-                stdout=(subprocess.PIPE if self.capture_output else subprocess.DEVNULL),
-                stderr=(subprocess.STDOUT if self.capture_output else subprocess.DEVNULL),
-                text=True,
-                env=self.env,
-                shell=self.shell,
-            ) as process:  # nosec
-                if self.capture_output and process.stdout is not None:
-                    if self.print_output:
-                        for line in iter(process.stdout.readline, ""):
-                            self.logger.info(line.strip())
-                        process.wait()
-                    else:
-                        stdout, stderr = process.communicate()
-
-            if handle_errors:
-                # Check return code
-                if process.returncode != 0:
-                    raise subprocess.CalledProcessError(process.returncode, self.command_str)
-            else:
-                completed_process = subprocess.CompletedProcess(process.args, process.returncode, stdout, stderr)
-        except subprocess.CalledProcessError as e:
-            raise UserNotificationException(f"Command '{self.command_str}' execution failed with return code {e.returncode}") from None
-        except FileNotFoundError as e:
-            raise UserNotificationException(f"Command '{self.command_str}' could not be executed. Failed with error {e}") from None
-        except KeyboardInterrupt:
-            raise UserNotificationException(f"Command '{self.command_str}' execution interrupted by user") from None
-        return completed_process
+import locale
+import shutil
+import subprocess  # nosec
+from pathlib import Path
+from typing import Any
+
+from .exceptions import UserNotificationException
+from .logging import logger
+
+
+def which(app_name: str) -> Path | None:
+    """Return the path to the app if it is in the PATH, otherwise return None."""
+    app_path = shutil.which(app_name)
+    return Path(app_path) if app_path else None
+
+
+class SubprocessExecutor:
+    """
+    Execute a command in a subprocess.
+
+    Args:
+    ----
+        capture_output: If True, the output of the command will be captured.
+        print_output: If True, the output of the command will be printed to the logger.
+                      One can set this to false in order to get the output in the returned CompletedProcess object.
+
+    """
+
+    def __init__(
+        self,
+        command: str | list[str | Path],
+        cwd: Path | None = None,
+        capture_output: bool = True,
+        env: dict[str, str] | None = None,
+        shell: bool = False,
+        print_output: bool = True,
+    ):
+        self.logger = logger.bind()
+        self.command = command
+        self.current_working_directory = cwd
+        self.capture_output = capture_output
+        self.env = env
+        self.shell = shell
+        self.print_output = print_output
+
+    @property
+    def command_str(self) -> str:
+        if isinstance(self.command, str):
+            return self.command
+        return " ".join(str(arg) if not isinstance(arg, str) else arg for arg in self.command)
+
+    def execute(self, handle_errors: bool = True) -> subprocess.CompletedProcess[Any] | None:
+        """Execute the command and return the CompletedProcess object if handle_errors is False."""
+        try:
+            completed_process = None
+            stdout = ""
+            stderr = ""
+            self.logger.info(f"Running command: {self.command_str}")
+            cwd_path = (self.current_working_directory or Path.cwd()).as_posix()
+            with subprocess.Popen(
+                args=self.command,
+                cwd=cwd_path,
+                # Combine both streams to stdout (when captured)
+                stdout=(subprocess.PIPE if self.capture_output else subprocess.DEVNULL),
+                stderr=(subprocess.STDOUT if self.capture_output else subprocess.DEVNULL),
+                # enables line buffering, line is flushed after each \n
+                bufsize=1,
+                text=True,
+                # every new line is a \n
+                universal_newlines=True,
+                # decode bytes to str using current locale/system encoding
+                encoding=locale.getpreferredencoding(False),
+                # replace unknown characters with �
+                errors="replace",
+                env=self.env,
+                shell=self.shell,
+            ) as process:  # nosec
+                if self.capture_output and process.stdout is not None:
+                    if self.print_output:
+                        for line in iter(process.stdout.readline, ""):
+                            self.logger.info(line.strip())
+                            stdout += line
+                        process.wait()
+                    else:
+                        stdout, stderr = process.communicate()
+
+            if handle_errors:
+                # Check return code
+                if process.returncode != 0:
+                    raise subprocess.CalledProcessError(process.returncode, self.command_str)
+            else:
+                completed_process = subprocess.CompletedProcess(process.args, process.returncode, stdout, stderr)
+        except subprocess.CalledProcessError as e:
+            raise UserNotificationException(f"Command '{self.command_str}' execution failed with return code {e.returncode}") from None
+        except FileNotFoundError as e:
+            raise UserNotificationException(f"Command '{self.command_str}' could not be executed. Failed with error {e}") from None
+        except KeyboardInterrupt:
+            raise UserNotificationException(f"Command '{self.command_str}' execution interrupted by user") from None
+        return completed_process
diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py
@@ -1,20 +1,166 @@
-from pathlib import Path
-
-from py_app_dev.core.subprocess import SubprocessExecutor, which
-
-
-def test_get_app_path():
-    assert which("python")
-
-
-def test_subprocess_executor(tmp_path: Path) -> None:
-    SubprocessExecutor(["python", "-V"], cwd=tmp_path, capture_output=True).execute()
-
-
-def test_subprocess_executor_no_error_handling() -> None:
-    process = SubprocessExecutor(["python", "-V"], capture_output=True).execute(handle_errors=False)
-    assert process and process.returncode == 0
-    assert process.stdout == ""
-    process = SubprocessExecutor(["python", "-V"], capture_output=True, print_output=False).execute(handle_errors=False)
-    assert process and process.returncode == 0
-    assert "Python" in process.stdout
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+
+from py_app_dev.core.subprocess import SubprocessExecutor, which
+
+
+def test_get_app_path():
+    """Test the which function for finding executables in PATH."""
+    assert which("python")
+
+
+class TestSubprocessExecutor:
+    """Test class for SubprocessExecutor functionality."""
+
+    @patch("loguru._logger.Logger.info")
+    def test_no_error_handling_scenarios(self, mock_info: Mock) -> None:
+        """Test various scenarios when error handling is disabled."""
+        # Test 1: Default behavior (print_output=True) - should log both command and output
+        mock_info.reset_mock()
+        process = SubprocessExecutor(["python", "-V"]).execute(handle_errors=False)
+        assert process and process.returncode == 0
+        assert "Python" in process.stdout
+
+        # Verify logger was called - should have at least 2 calls: command + python version output
+        assert mock_info.call_count >= 2
+        # Check that the command execution was logged
+        command_logged = any("Running command: python -V" in str(call) for call in mock_info.call_args_list)
+        assert command_logged, "Command execution should be logged"
+        # Check that Python version output was logged
+        python_output_logged = any("Python" in str(call) and "Running command" not in str(call) for call in mock_info.call_args_list)
+        assert python_output_logged, "Python version output should be logged when print_output=True"
+
+        # Test 2: print_output=False - should only log command, not output
+        mock_info.reset_mock()
+        process = SubprocessExecutor(["python", "-V"], capture_output=True, print_output=False).execute(handle_errors=False)
+        assert process and process.returncode == 0
+        assert "Python" in process.stdout
+
+        # Should only log the command execution, not the output
+        assert mock_info.call_count == 1
+        assert "Running command: python -V" in str(mock_info.call_args_list[0])
+
+        # Test 3: capture_output=False - should only log command, no stdout captured
+        mock_info.reset_mock()
+        process = SubprocessExecutor(["python", "-V"], capture_output=False, print_output=False).execute(handle_errors=False)
+        assert process and process.returncode == 0
+        assert process.stdout == ""
+
+        # Should only log the command execution
+        assert mock_info.call_count == 1
+        assert "Running command: python -V" in str(mock_info.call_args_list[0])
+
+    @pytest.mark.parametrize(
+        "capture_output,print_output,expected_stdout_empty",
+        [
+            (True, True, False),  # Capture and print - should have stdout
+            (True, False, False),  # Capture but don't print - should have stdout
+            (False, True, True),  # Don't capture but print - should have empty stdout
+            (False, False, True),  # Don't capture or print - should have empty stdout
+        ],
+    )
+    def test_output_capture_combinations(self, capture_output: bool, print_output: bool, expected_stdout_empty: bool) -> None:
+        """Test different combinations of capture_output and print_output parameters."""
+        process = SubprocessExecutor(["python", "-V"], capture_output=capture_output, print_output=print_output).execute(handle_errors=False)
+
+        assert process and process.returncode == 0
+
+        if expected_stdout_empty:
+            assert process.stdout == ""
+        else:
+            assert "Python" in process.stdout
+
+    @pytest.mark.parametrize(
+        "command, exp_stdout, exp_returncode",
+        [
+            (["python", "-c", "print('Hello World!')"], "Hello World!\n", 0),
+            # SubprocessExecutor redirects stderr to stdout when capture_output=True
+            (
+                [
+                    "python",
+                    "-c",
+                    "import sys; print('Hello World!', file=sys.stderr)",
+                ],
+                "Hello World!\n",
+                0,
+            ),
+            (["python", "-c", "exit(0)"], "", 0),
+            (["python", "-c", "exit(1)"], "", 1),
+            (["python", "-c", "exit(42)"], "", 42),
+        ],
+    )
+    def test_command_execution_scenarios(self, command, exp_stdout, exp_returncode):
+        """Test various command execution scenarios adapted from CommandLineExecutor tests."""
+        # Arrange
+        executor = SubprocessExecutor(command, capture_output=True, print_output=False)
+
+        # Act
+        result = executor.execute(handle_errors=False)
+
+        # Assert
+        assert result is not None
+        assert result.stdout == exp_stdout
+        # Note: SubprocessExecutor redirects stderr to stdout, so stderr is always None
+        # This is different from CommandLineExecutor which returned empty string for stderr
+        assert result.stderr is None
+        assert result.returncode == exp_returncode
+
+    def test_junction_creation(self, tmp_path: Path) -> None:
+        """Test creating a junction link (Windows-specific test adapted from CommandLineExecutor)."""
+        import platform
+
+        if platform.system() != "Windows":
+            pytest.skip("Junction creation test is Windows-specific")
+
+        # Arrange
+        test_path = tmp_path.joinpath("test")
+        test_path.mkdir()
+        link_path = test_path.joinpath("link")
+        command = ["cmd", "/c", "mklink", "/J", str(link_path), str(test_path)]
+        executor = SubprocessExecutor(command, capture_output=True, print_output=False)
+
+        # Act
+        result = executor.execute(handle_errors=False)
+
+        # Assert
+        assert result is not None
+        assert result.returncode == 0
+
+    @pytest.mark.parametrize(
+        "stream_type, test_data, expected_text_parts",
+        [
+            ("stdout", b"Hello\x85World\n", ["Hello", "World"]),
+            ("stderr", b"Error\x85Message\n", ["Error", "Message"]),
+        ],
+    )
+    def test_undecodable_bytes_handling(self, stream_type: str, test_data: bytes, expected_text_parts: list[str]) -> None:
+        """Test that undecodable bytes in stdout/stderr are handled gracefully."""
+        # Arrange
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp:
+            # Write bytes that are invalid in UTF-8 (e.g., 0x85)
+            tmp.write(test_data)
+            tmp_path = tmp.name
+
+        try:
+            if stream_type == "stdout":
+                py_cmd = ["python", "-c", f"import sys; sys.stdout.buffer.write(open(r'{tmp_path}', 'rb').read())"]
+            else:  # stderr
+                py_cmd = ["python", "-c", f"import sys; sys.stderr.buffer.write(open(r'{tmp_path}', 'rb').read())"]
+
+            executor = SubprocessExecutor(py_cmd, capture_output=True, print_output=False)
+
+            # Act
+            result = executor.execute(handle_errors=False)
+
+            # Assert
+            assert result is not None
+            for expected_part in expected_text_parts:
+                assert expected_part in result.stdout
+            # Should not raise UnicodeDecodeError due to errors="replace" in subprocess.py
+            assert result.returncode == 0
+        finally:
+            os.remove(tmp_path)