From 970b4c87c067ac9f9d084f36313cfeea42655cc2 Mon Sep 17 00:00:00 2001 From: PDD Bot Date: Sun, 15 Feb 2026 05:38:59 +0000 Subject: [PATCH] Add failing tests for DEFAULT_STRENGTH inconsistency (#505) Add unit and E2E tests that detect stale hardcoded DEFAULT_STRENGTH values across cli.py (0.75), llm_invoke.py (0.5), and executor.py (0.5) which should all reflect the canonical value of 1.0 from pdd/__init__.py. Also fix prompt template generate_pddrc_YAML.prompt which referenced the stale 0.75 default in 3 places. Tests are verified to fail on current code and will pass once the code bug is fixed. Fixes #505 --- .../generic/generate_pddrc_YAML.prompt | 6 +- tests/core/test_cli.py | 47 ++++ tests/server/test_executor.py | 44 +++- tests/test_e2e_issue_505_default_strength.py | 215 ++++++++++++++++++ tests/test_llm_invoke.py | 21 +- 5 files changed, 328 insertions(+), 5 deletions(-) create mode 100644 tests/test_e2e_issue_505_default_strength.py diff --git a/pdd/templates/generic/generate_pddrc_YAML.prompt b/pdd/templates/generic/generate_pddrc_YAML.prompt index 792ac148d..e0f4d4328 100644 --- a/pdd/templates/generic/generate_pddrc_YAML.prompt +++ b/pdd/templates/generic/generate_pddrc_YAML.prompt @@ -56,7 +56,7 @@ contexts: example_output_path: "examples/" # Where example files go default_language: "python" # Primary language target_coverage: 90.0 # Test coverage target (%) - strength: 0.75 # LLM generation strength (0-1) + strength: 1.0 # LLM generation strength (0-1) temperature: 0.0 # LLM temperature (0-1) budget: 10.0 # Cost budget per operation ($) max_attempts: 3 # Max retry attempts @@ -105,7 +105,7 @@ INSTRUCTIONS: * Utils/Shared: 85-90% (reusable code) * CLI: 85% (user-facing interfaces) - **auto_deps_csv_path**: "project_dependencies.csv" (ALWAYS include this) - - **strength**: 0.75 (standard - can be omitted, PDD will use default) + - **strength**: 1.0 (standard - can be omitted, PDD will use default) - **temperature**: 0.0 (deterministic - can be omitted, PDD will use default) - **budget**: 10.0 (standard - can be omitted, PDD will use default) - **max_attempts**: 3 (standard - can be omitted, PDD will use default) @@ -153,7 +153,7 @@ contexts: example_output_path: "examples/" default_language: "python" target_coverage: 90.0 - strength: 0.75 # Optional: LLM generation strength + strength: 1.0 # Optional: LLM generation strength temperature: 0.0 # Optional: LLM temperature budget: 10.0 # Optional: Cost budget per operation max_attempts: 3 # Optional: Max retry attempts diff --git a/tests/core/test_cli.py b/tests/core/test_cli.py index 7eb339290..bad5a69da 100644 --- a/tests/core/test_cli.py +++ b/tests/core/test_cli.py @@ -679,6 +679,53 @@ def test_process_commands_fatal_exception(mock_write_dump, mock_print): process_commands(results=[({}, 0.1, "gpt-4")]) ctx.exit.assert_called_with(1) +def test_cli_help_shows_correct_default_strength(runner): + """Issue #505: CLI help text for --strength must display the actual DEFAULT_STRENGTH. + + The help string in pdd/core/cli.py:220 hardcodes "Default: 0.75" but the + canonical constant in pdd/__init__.py is DEFAULT_STRENGTH = 1.0. This test + ensures the help text always reflects the real default so users are not + misled about which model tier they are using. + """ + result = runner.invoke(cli_command, ["--help"]) + assert result.exit_code == 0 + # The help text must contain the canonical DEFAULT_STRENGTH value + expected_fragment = f"Default: {DEFAULT_STRENGTH}" + assert expected_fragment in result.output, ( + f"CLI --help should say '{expected_fragment}' but got:\n{result.output}" + ) + # The stale value 0.75 must NOT appear in the strength help text + assert "Default: 0.75" not in result.output, ( + "CLI --help still contains the stale 'Default: 0.75' for --strength" + ) + + +def test_default_strength_consistent_across_modules(runner): + """Issue #505: DEFAULT_STRENGTH must be consistent across all modules. + + Imports DEFAULT_STRENGTH from pdd (canonical) and pdd.server.executor, + and checks that the CLI help string references the same value. This + prevents future drift when the constant is updated in one place but + not in others. + """ + import pdd + import pdd.server.executor as executor_mod + + # 1. executor module's DEFAULT_STRENGTH must match the canonical constant + assert executor_mod.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, ( + f"executor.DEFAULT_STRENGTH={executor_mod.DEFAULT_STRENGTH} != " + f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}" + ) + + # 2. CLI help text must reference the canonical value + result = runner.invoke(cli_command, ["--help"]) + assert result.exit_code == 0 + expected_fragment = f"Default: {pdd.DEFAULT_STRENGTH}" + assert expected_fragment in result.output, ( + f"CLI help should contain '{expected_fragment}' but got:\n{result.output}" + ) + + if __name__ == "__main__": import pytest sys.exit(pytest.main([__file__])) diff --git a/tests/server/test_executor.py b/tests/server/test_executor.py index dd5f2e776..9bd0fe6fe 100644 --- a/tests/server/test_executor.py +++ b/tests/server/test_executor.py @@ -254,4 +254,46 @@ def test_get_pdd_command_fallback(): # We can't easily force an ImportError inside the function without complex mocking of sys.modules, # but we can test the fallback for an unknown name. cmd = get_pdd_command("definitely_not_a_real_command") - assert cmd is None \ No newline at end of file + assert cmd is None + + +def test_executor_default_strength_matches_canonical(): + """Issue #505: executor.DEFAULT_STRENGTH must match pdd.DEFAULT_STRENGTH. + + The ImportError fallback at pdd/server/executor.py:16 hardcodes + DEFAULT_STRENGTH = 0.5, but the canonical constant in pdd/__init__.py + is 1.0. This test inspects the source code to verify the fallback + value matches, catching drift even when the import succeeds at runtime. + """ + import ast + import pdd + import inspect + + # Runtime check: the loaded value must match canonical + assert executor_module.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, ( + f"executor.DEFAULT_STRENGTH={executor_module.DEFAULT_STRENGTH} != " + f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}" + ) + + # Source-level check: the hardcoded fallback in the except ImportError + # block must also match the canonical value. This catches the case where + # the import succeeds at test time but the fallback would be wrong in a + # different deployment environment. + source = inspect.getsource(executor_module) + tree = ast.parse(source) + for node in ast.walk(tree): + if isinstance(node, ast.ExceptHandler): + for stmt in ast.walk(node): + if (isinstance(stmt, ast.Assign) + and any( + isinstance(t, ast.Name) and t.id == "DEFAULT_STRENGTH" + for t in stmt.targets + )): + # Extract the hardcoded fallback value + value_node = stmt.value + if isinstance(value_node, ast.Constant): + assert value_node.value == pdd.DEFAULT_STRENGTH, ( + f"Hardcoded fallback DEFAULT_STRENGTH={value_node.value} " + f"in executor.py ImportError handler does not match " + f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}" + ) \ No newline at end of file diff --git a/tests/test_e2e_issue_505_default_strength.py b/tests/test_e2e_issue_505_default_strength.py new file mode 100644 index 000000000..b4b038d2f --- /dev/null +++ b/tests/test_e2e_issue_505_default_strength.py @@ -0,0 +1,215 @@ +""" +E2E Test (Subprocess-based) for Issue #505: CLI help text shows wrong +DEFAULT_STRENGTH (0.75 vs actual 1.0). + +This is a true E2E test that uses subprocess to invoke the actual CLI binary, +exercising the full code path that a user would take. + +Bug: When running ``pdd --help``, the ``--strength`` option displays +"Default: 0.75 or .pddrc value" but the actual default used at runtime +(``pdd.DEFAULT_STRENGTH``) is ``1.0``. Users who rely on the help text +believe they are using a mid-tier model (0.75) but are actually charged +for the most powerful model (1.0). + +E2E Test Strategy: +- Use subprocess to run ``python -m pdd.cli --help`` (like a real user) +- Parse the ``--strength`` help text from stdout +- Assert the documented default matches the canonical constant in + ``pdd/__init__.py`` +- Also run ``python -c "from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)"`` + to read the canonical value dynamically — no hardcoded expected value + +The test should: +- FAIL on the current buggy code (help says 0.75, canonical says 1.0) +- PASS once the bug is fixed (help says 1.0, matching canonical) + +Issue: https://github.com/promptdriven/pdd/issues/505 +""" + +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + + +def get_project_root() -> Path: + """Get the project root directory.""" + current = Path(__file__).parent + while current != current.parent: + if (current / "pdd").is_dir() and (current / "pyproject.toml").exists(): + return current + current = current.parent + raise RuntimeError("Could not find project root with pdd/ directory") + + +@pytest.mark.e2e +class TestIssue505E2ESubprocess: + """ + E2E tests using subprocess to verify the --strength default in CLI help. + + These tests exercise the full CLI path that users take when running + ``pdd --help`` to check available options and their defaults. + """ + + def _run_pdd_help(self, timeout: int = 30) -> str: + """Run ``pdd --help`` via subprocess and return combined output.""" + project_root = get_project_root() + env = os.environ.copy() + env["PYTHONPATH"] = str(project_root) + # Prevent auto-update checks from interfering + env["PDD_AUTO_UPDATE"] = "false" + + result = subprocess.run( + [sys.executable, "-m", "pdd.cli", "--help"], + capture_output=True, + text=True, + cwd=str(project_root), + env=env, + timeout=timeout, + ) + return result.stdout + result.stderr + + def _get_canonical_default_strength(self, timeout: int = 10) -> str: + """Read DEFAULT_STRENGTH from ``pdd/__init__.py`` via subprocess.""" + project_root = get_project_root() + env = os.environ.copy() + env["PYTHONPATH"] = str(project_root) + + result = subprocess.run( + [ + sys.executable, "-c", + "from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)", + ], + capture_output=True, + text=True, + cwd=str(project_root), + env=env, + timeout=timeout, + ) + assert result.returncode == 0, ( + f"Failed to read DEFAULT_STRENGTH: {result.stderr}" + ) + return result.stdout.strip() + + # ------------------------------------------------------------------ + # Test 1: The core user-facing bug + # ------------------------------------------------------------------ + def test_pdd_help_strength_default_matches_canonical(self): + """ + E2E: ``pdd --help`` must show the correct DEFAULT_STRENGTH value. + + User scenario: + 1. User runs ``pdd --help`` to see available options + 2. User reads the --strength option and its documented default + 3. User trusts the help text and does NOT explicitly set --strength + + Expected: Help text says "Default: 1.0" (the canonical value) + Actual (bug): Help text says "Default: 0.75" (stale value) + + This test FAILS on buggy code, PASSES after fix. + """ + canonical = self._get_canonical_default_strength() + help_output = self._run_pdd_help() + + # Extract the strength help line + # The help text contains something like: + # --strength ... Default: 0.75 or .pddrc value. + expected_fragment = f"Default: {canonical}" + assert expected_fragment in help_output, ( + f"BUG DETECTED (Issue #505): CLI --help does not show the correct " + f"DEFAULT_STRENGTH.\n" + f" Expected to find: '{expected_fragment}'\n" + f" Canonical DEFAULT_STRENGTH: {canonical}\n\n" + f"Users see incorrect default and may incur unexpected API costs.\n\n" + f"Full --help output:\n{help_output}" + ) + + # ------------------------------------------------------------------ + # Test 2: Stale value must NOT appear + # ------------------------------------------------------------------ + def test_pdd_help_does_not_show_stale_075(self): + """ + E2E: ``pdd --help`` must NOT claim the strength default is 0.75. + + This guards against the specific stale value reported in the issue. + + This test FAILS on buggy code, PASSES after fix. + """ + help_output = self._run_pdd_help() + + assert "Default: 0.75" not in help_output, ( + f"BUG DETECTED (Issue #505): CLI --help still contains the stale " + f"'Default: 0.75' for --strength.\n" + f"The actual DEFAULT_STRENGTH is 1.0.\n\n" + f"Full --help output:\n{help_output}" + ) + + # ------------------------------------------------------------------ + # Test 3: Full round-trip — help text ↔ runtime default + # ------------------------------------------------------------------ + def test_help_default_matches_runtime_config_resolution(self): + """ + E2E: The default shown in ``--help`` must match what the config + resolution layer actually uses when no ``--strength`` is provided. + + This exercises two separate code paths end-to-end: + 1. CLI help text rendering (``pdd/core/cli.py``) + 2. Config resolution (``pdd/core/config_resolution.py`` → + ``pdd.DEFAULT_STRENGTH``) + + If these disagree, users are misled about which model tier they use. + + This test FAILS on buggy code, PASSES after fix. + """ + project_root = get_project_root() + env = os.environ.copy() + env["PYTHONPATH"] = str(project_root) + # Ensure no .pddrc override so config_resolution falls back to + # DEFAULT_STRENGTH + env.pop("PDD_STRENGTH", None) + env["PDD_AUTO_UPDATE"] = "false" + + # Step 1: Get the canonical DEFAULT_STRENGTH + canonical = self._get_canonical_default_strength() + + # Step 2: Get what config_resolution actually resolves to + result = subprocess.run( + [ + sys.executable, "-c", + ( + "import sys, os; " + "os.environ.pop('PDD_STRENGTH', None); " + "from pdd.core.config_resolution import resolve_strength; " + "print(resolve_strength(None, None))" + ), + ], + capture_output=True, + text=True, + cwd=str(project_root), + env=env, + timeout=10, + ) + resolved_strength = result.stdout.strip() + + # Step 3: Get the help text + help_output = self._run_pdd_help() + + # All three must agree + expected_fragment = f"Default: {canonical}" + assert expected_fragment in help_output, ( + f"BUG DETECTED (Issue #505): Help text default doesn't match " + f"canonical DEFAULT_STRENGTH.\n" + f" Canonical: {canonical}\n" + f" Resolved at runtime: {resolved_strength}\n" + f" Help text does not contain '{expected_fragment}'\n\n" + f"Full --help output:\n{help_output}" + ) + + assert canonical == resolved_strength, ( + f"DEFAULT_STRENGTH ({canonical}) != resolved strength " + f"({resolved_strength}) — config_resolution disagrees with " + f"pdd/__init__.py" + ) diff --git a/tests/test_llm_invoke.py b/tests/test_llm_invoke.py index a2cc980dc..4c1599a6d 100644 --- a/tests/test_llm_invoke.py +++ b/tests/test_llm_invoke.py @@ -4683,4 +4683,23 @@ def capture_completion(**kwargs): # time=None should be treated as 0, so no reasoning params assert "thinking" not in captured_kwargs - assert "reasoning_effort" not in captured_kwargs \ No newline at end of file + assert "reasoning_effort" not in captured_kwargs + + +def test_llm_invoke_default_strength_matches_canonical(): + """Issue #505: llm_invoke() default strength must match pdd.DEFAULT_STRENGTH. + + The function signature at pdd/llm_invoke.py:1657 hardcodes + strength: float = 0.5, but the canonical constant in pdd/__init__.py + is DEFAULT_STRENGTH = 1.0. This test uses inspect.signature() to + verify the parameter default matches the source of truth. + """ + import inspect + import pdd + + sig = inspect.signature(llm_invoke) + strength_param = sig.parameters["strength"] + assert strength_param.default == pdd.DEFAULT_STRENGTH, ( + f"llm_invoke() strength default is {strength_param.default}, " + f"expected pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}" + ) \ No newline at end of file