From 970b4c87c067ac9f9d084f36313cfeea42655cc2 Mon Sep 17 00:00:00 2001
From: PDD Bot <pdd-bot@users.noreply.github.com>
Date: Sun, 15 Feb 2026 05:38:59 +0000
Subject: [PATCH] Add failing tests for DEFAULT_STRENGTH inconsistency (#505)

Add unit and E2E tests that detect stale hardcoded DEFAULT_STRENGTH
values across cli.py (0.75), llm_invoke.py (0.5), and executor.py (0.5)
which should all reflect the canonical value of 1.0 from pdd/__init__.py.

Also fix prompt template generate_pddrc_YAML.prompt which referenced
the stale 0.75 default in 3 places.

Tests are verified to fail on current code and will pass once the
code bug is fixed.

Fixes #505
---
 .../generic/generate_pddrc_YAML.prompt        |   6 +-
 tests/core/test_cli.py                        |  47 ++++
 tests/server/test_executor.py                 |  44 +++-
 tests/test_e2e_issue_505_default_strength.py  | 215 ++++++++++++++++++
 tests/test_llm_invoke.py                      |  21 +-
 5 files changed, 328 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_e2e_issue_505_default_strength.py

diff --git a/pdd/templates/generic/generate_pddrc_YAML.prompt b/pdd/templates/generic/generate_pddrc_YAML.prompt
index 792ac148d..e0f4d4328 100644
--- a/pdd/templates/generic/generate_pddrc_YAML.prompt
+++ b/pdd/templates/generic/generate_pddrc_YAML.prompt
@@ -56,7 +56,7 @@ contexts:
       example_output_path: "examples/"          # Where example files go
       default_language: "python"                # Primary language
       target_coverage: 90.0                     # Test coverage target (%)
-      strength: 0.75                            # LLM generation strength (0-1)
+      strength: 1.0                             # LLM generation strength (0-1)
       temperature: 0.0                          # LLM temperature (0-1)
       budget: 10.0                              # Cost budget per operation ($)
       max_attempts: 3                           # Max retry attempts
@@ -105,7 +105,7 @@ INSTRUCTIONS:
      * Utils/Shared: 85-90% (reusable code)
      * CLI: 85% (user-facing interfaces)
    - **auto_deps_csv_path**: "project_dependencies.csv" (ALWAYS include this)
-   - **strength**: 0.75 (standard - can be omitted, PDD will use default)
+   - **strength**: 1.0 (standard - can be omitted, PDD will use default)
    - **temperature**: 0.0 (deterministic - can be omitted, PDD will use default)
    - **budget**: 10.0 (standard - can be omitted, PDD will use default)
    - **max_attempts**: 3 (standard - can be omitted, PDD will use default)
@@ -153,7 +153,7 @@ contexts:
       example_output_path: "examples/"
       default_language: "python"
       target_coverage: 90.0
-      strength: 0.75                            # Optional: LLM generation strength
+      strength: 1.0                             # Optional: LLM generation strength
       temperature: 0.0                          # Optional: LLM temperature
       budget: 10.0                              # Optional: Cost budget per operation
       max_attempts: 3                           # Optional: Max retry attempts
diff --git a/tests/core/test_cli.py b/tests/core/test_cli.py
index 7eb339290..bad5a69da 100644
--- a/tests/core/test_cli.py
+++ b/tests/core/test_cli.py
@@ -679,6 +679,53 @@ def test_process_commands_fatal_exception(mock_write_dump, mock_print):
         process_commands(results=[({}, 0.1, "gpt-4")])
     ctx.exit.assert_called_with(1)
 
+def test_cli_help_shows_correct_default_strength(runner):
+    """Issue #505: CLI help text for --strength must display the actual DEFAULT_STRENGTH.
+
+    The help string in pdd/core/cli.py:220 hardcodes "Default: 0.75" but the
+    canonical constant in pdd/__init__.py is DEFAULT_STRENGTH = 1.0.  This test
+    ensures the help text always reflects the real default so users are not
+    misled about which model tier they are using.
+    """
+    result = runner.invoke(cli_command, ["--help"])
+    assert result.exit_code == 0
+    # The help text must contain the canonical DEFAULT_STRENGTH value
+    expected_fragment = f"Default: {DEFAULT_STRENGTH}"
+    assert expected_fragment in result.output, (
+        f"CLI --help should say '{expected_fragment}' but got:\n{result.output}"
+    )
+    # The stale value 0.75 must NOT appear in the strength help text
+    assert "Default: 0.75" not in result.output, (
+        "CLI --help still contains the stale 'Default: 0.75' for --strength"
+    )
+
+
+def test_default_strength_consistent_across_modules(runner):
+    """Issue #505: DEFAULT_STRENGTH must be consistent across all modules.
+
+    Imports DEFAULT_STRENGTH from pdd (canonical) and pdd.server.executor,
+    and checks that the CLI help string references the same value.  This
+    prevents future drift when the constant is updated in one place but
+    not in others.
+    """
+    import pdd
+    import pdd.server.executor as executor_mod
+
+    # 1. executor module's DEFAULT_STRENGTH must match the canonical constant
+    assert executor_mod.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, (
+        f"executor.DEFAULT_STRENGTH={executor_mod.DEFAULT_STRENGTH} != "
+        f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
+    )
+
+    # 2. CLI help text must reference the canonical value
+    result = runner.invoke(cli_command, ["--help"])
+    assert result.exit_code == 0
+    expected_fragment = f"Default: {pdd.DEFAULT_STRENGTH}"
+    assert expected_fragment in result.output, (
+        f"CLI help should contain '{expected_fragment}' but got:\n{result.output}"
+    )
+
+
 if __name__ == "__main__":
     import pytest
     sys.exit(pytest.main([__file__]))
diff --git a/tests/server/test_executor.py b/tests/server/test_executor.py
index dd5f2e776..9bd0fe6fe 100644
--- a/tests/server/test_executor.py
+++ b/tests/server/test_executor.py
@@ -254,4 +254,46 @@ def test_get_pdd_command_fallback():
     # We can't easily force an ImportError inside the function without complex mocking of sys.modules,
     # but we can test the fallback for an unknown name.
     cmd = get_pdd_command("definitely_not_a_real_command")
-    assert cmd is None
\ No newline at end of file
+    assert cmd is None
+
+
+def test_executor_default_strength_matches_canonical():
+    """Issue #505: executor.DEFAULT_STRENGTH must match pdd.DEFAULT_STRENGTH.
+
+    The ImportError fallback at pdd/server/executor.py:16 hardcodes
+    DEFAULT_STRENGTH = 0.5, but the canonical constant in pdd/__init__.py
+    is 1.0.  This test inspects the source code to verify the fallback
+    value matches, catching drift even when the import succeeds at runtime.
+    """
+    import ast
+    import pdd
+    import inspect
+
+    # Runtime check: the loaded value must match canonical
+    assert executor_module.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, (
+        f"executor.DEFAULT_STRENGTH={executor_module.DEFAULT_STRENGTH} != "
+        f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
+    )
+
+    # Source-level check: the hardcoded fallback in the except ImportError
+    # block must also match the canonical value.  This catches the case where
+    # the import succeeds at test time but the fallback would be wrong in a
+    # different deployment environment.
+    source = inspect.getsource(executor_module)
+    tree = ast.parse(source)
+    for node in ast.walk(tree):
+        if isinstance(node, ast.ExceptHandler):
+            for stmt in ast.walk(node):
+                if (isinstance(stmt, ast.Assign)
+                        and any(
+                            isinstance(t, ast.Name) and t.id == "DEFAULT_STRENGTH"
+                            for t in stmt.targets
+                        )):
+                    # Extract the hardcoded fallback value
+                    value_node = stmt.value
+                    if isinstance(value_node, ast.Constant):
+                        assert value_node.value == pdd.DEFAULT_STRENGTH, (
+                            f"Hardcoded fallback DEFAULT_STRENGTH={value_node.value} "
+                            f"in executor.py ImportError handler does not match "
+                            f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
+                        )
\ No newline at end of file
diff --git a/tests/test_e2e_issue_505_default_strength.py b/tests/test_e2e_issue_505_default_strength.py
new file mode 100644
index 000000000..b4b038d2f
--- /dev/null
+++ b/tests/test_e2e_issue_505_default_strength.py
@@ -0,0 +1,215 @@
+"""
+E2E Test (Subprocess-based) for Issue #505: CLI help text shows wrong
+DEFAULT_STRENGTH (0.75 vs actual 1.0).
+
+This is a true E2E test that uses subprocess to invoke the actual CLI binary,
+exercising the full code path that a user would take.
+
+Bug: When running ``pdd --help``, the ``--strength`` option displays
+"Default: 0.75 or .pddrc value" but the actual default used at runtime
+(``pdd.DEFAULT_STRENGTH``) is ``1.0``.  Users who rely on the help text
+believe they are using a mid-tier model (0.75) but are actually charged
+for the most powerful model (1.0).
+
+E2E Test Strategy:
+- Use subprocess to run ``python -m pdd.cli --help`` (like a real user)
+- Parse the ``--strength`` help text from stdout
+- Assert the documented default matches the canonical constant in
+  ``pdd/__init__.py``
+- Also run ``python -c "from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)"``
+  to read the canonical value dynamically — no hardcoded expected value
+
+The test should:
+- FAIL on the current buggy code (help says 0.75, canonical says 1.0)
+- PASS once the bug is fixed (help says 1.0, matching canonical)
+
+Issue: https://github.com/promptdriven/pdd/issues/505
+"""
+
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+
+def get_project_root() -> Path:
+    """Get the project root directory."""
+    current = Path(__file__).parent
+    while current != current.parent:
+        if (current / "pdd").is_dir() and (current / "pyproject.toml").exists():
+            return current
+        current = current.parent
+    raise RuntimeError("Could not find project root with pdd/ directory")
+
+
+@pytest.mark.e2e
+class TestIssue505E2ESubprocess:
+    """
+    E2E tests using subprocess to verify the --strength default in CLI help.
+
+    These tests exercise the full CLI path that users take when running
+    ``pdd --help`` to check available options and their defaults.
+    """
+
+    def _run_pdd_help(self, timeout: int = 30) -> str:
+        """Run ``pdd --help`` via subprocess and return combined output."""
+        project_root = get_project_root()
+        env = os.environ.copy()
+        env["PYTHONPATH"] = str(project_root)
+        # Prevent auto-update checks from interfering
+        env["PDD_AUTO_UPDATE"] = "false"
+
+        result = subprocess.run(
+            [sys.executable, "-m", "pdd.cli", "--help"],
+            capture_output=True,
+            text=True,
+            cwd=str(project_root),
+            env=env,
+            timeout=timeout,
+        )
+        return result.stdout + result.stderr
+
+    def _get_canonical_default_strength(self, timeout: int = 10) -> str:
+        """Read DEFAULT_STRENGTH from ``pdd/__init__.py`` via subprocess."""
+        project_root = get_project_root()
+        env = os.environ.copy()
+        env["PYTHONPATH"] = str(project_root)
+
+        result = subprocess.run(
+            [
+                sys.executable, "-c",
+                "from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)",
+            ],
+            capture_output=True,
+            text=True,
+            cwd=str(project_root),
+            env=env,
+            timeout=timeout,
+        )
+        assert result.returncode == 0, (
+            f"Failed to read DEFAULT_STRENGTH: {result.stderr}"
+        )
+        return result.stdout.strip()
+
+    # ------------------------------------------------------------------
+    # Test 1: The core user-facing bug
+    # ------------------------------------------------------------------
+    def test_pdd_help_strength_default_matches_canonical(self):
+        """
+        E2E: ``pdd --help`` must show the correct DEFAULT_STRENGTH value.
+
+        User scenario:
+        1. User runs ``pdd --help`` to see available options
+        2. User reads the --strength option and its documented default
+        3. User trusts the help text and does NOT explicitly set --strength
+
+        Expected: Help text says "Default: 1.0" (the canonical value)
+        Actual (bug): Help text says "Default: 0.75" (stale value)
+
+        This test FAILS on buggy code, PASSES after fix.
+        """
+        canonical = self._get_canonical_default_strength()
+        help_output = self._run_pdd_help()
+
+        # Extract the strength help line
+        # The help text contains something like:
+        #   --strength ... Default: 0.75 or .pddrc value.
+        expected_fragment = f"Default: {canonical}"
+        assert expected_fragment in help_output, (
+            f"BUG DETECTED (Issue #505): CLI --help does not show the correct "
+            f"DEFAULT_STRENGTH.\n"
+            f"  Expected to find: '{expected_fragment}'\n"
+            f"  Canonical DEFAULT_STRENGTH: {canonical}\n\n"
+            f"Users see incorrect default and may incur unexpected API costs.\n\n"
+            f"Full --help output:\n{help_output}"
+        )
+
+    # ------------------------------------------------------------------
+    # Test 2: Stale value must NOT appear
+    # ------------------------------------------------------------------
+    def test_pdd_help_does_not_show_stale_075(self):
+        """
+        E2E: ``pdd --help`` must NOT claim the strength default is 0.75.
+
+        This guards against the specific stale value reported in the issue.
+
+        This test FAILS on buggy code, PASSES after fix.
+        """
+        help_output = self._run_pdd_help()
+
+        assert "Default: 0.75" not in help_output, (
+            f"BUG DETECTED (Issue #505): CLI --help still contains the stale "
+            f"'Default: 0.75' for --strength.\n"
+            f"The actual DEFAULT_STRENGTH is 1.0.\n\n"
+            f"Full --help output:\n{help_output}"
+        )
+
+    # ------------------------------------------------------------------
+    # Test 3: Full round-trip — help text ↔ runtime default
+    # ------------------------------------------------------------------
+    def test_help_default_matches_runtime_config_resolution(self):
+        """
+        E2E: The default shown in ``--help`` must match what the config
+        resolution layer actually uses when no ``--strength`` is provided.
+
+        This exercises two separate code paths end-to-end:
+        1. CLI help text rendering (``pdd/core/cli.py``)
+        2. Config resolution (``pdd/core/config_resolution.py`` →
+           ``pdd.DEFAULT_STRENGTH``)
+
+        If these disagree, users are misled about which model tier they use.
+
+        This test FAILS on buggy code, PASSES after fix.
+        """
+        project_root = get_project_root()
+        env = os.environ.copy()
+        env["PYTHONPATH"] = str(project_root)
+        # Ensure no .pddrc override so config_resolution falls back to
+        # DEFAULT_STRENGTH
+        env.pop("PDD_STRENGTH", None)
+        env["PDD_AUTO_UPDATE"] = "false"
+
+        # Step 1: Get the canonical DEFAULT_STRENGTH
+        canonical = self._get_canonical_default_strength()
+
+        # Step 2: Get what config_resolution actually resolves to
+        result = subprocess.run(
+            [
+                sys.executable, "-c",
+                (
+                    "import sys, os; "
+                    "os.environ.pop('PDD_STRENGTH', None); "
+                    "from pdd.core.config_resolution import resolve_strength; "
+                    "print(resolve_strength(None, None))"
+                ),
+            ],
+            capture_output=True,
+            text=True,
+            cwd=str(project_root),
+            env=env,
+            timeout=10,
+        )
+        resolved_strength = result.stdout.strip()
+
+        # Step 3: Get the help text
+        help_output = self._run_pdd_help()
+
+        # All three must agree
+        expected_fragment = f"Default: {canonical}"
+        assert expected_fragment in help_output, (
+            f"BUG DETECTED (Issue #505): Help text default doesn't match "
+            f"canonical DEFAULT_STRENGTH.\n"
+            f"  Canonical: {canonical}\n"
+            f"  Resolved at runtime: {resolved_strength}\n"
+            f"  Help text does not contain '{expected_fragment}'\n\n"
+            f"Full --help output:\n{help_output}"
+        )
+
+        assert canonical == resolved_strength, (
+            f"DEFAULT_STRENGTH ({canonical}) != resolved strength "
+            f"({resolved_strength}) — config_resolution disagrees with "
+            f"pdd/__init__.py"
+        )
diff --git a/tests/test_llm_invoke.py b/tests/test_llm_invoke.py
index a2cc980dc..4c1599a6d 100644
--- a/tests/test_llm_invoke.py
+++ b/tests/test_llm_invoke.py
@@ -4683,4 +4683,23 @@ def capture_completion(**kwargs):
 
         # time=None should be treated as 0, so no reasoning params
         assert "thinking" not in captured_kwargs
-        assert "reasoning_effort" not in captured_kwargs
\ No newline at end of file
+        assert "reasoning_effort" not in captured_kwargs
+
+
+def test_llm_invoke_default_strength_matches_canonical():
+    """Issue #505: llm_invoke() default strength must match pdd.DEFAULT_STRENGTH.
+
+    The function signature at pdd/llm_invoke.py:1657 hardcodes
+    strength: float = 0.5, but the canonical constant in pdd/__init__.py
+    is DEFAULT_STRENGTH = 1.0.  This test uses inspect.signature() to
+    verify the parameter default matches the source of truth.
+    """
+    import inspect
+    import pdd
+
+    sig = inspect.signature(llm_invoke)
+    strength_param = sig.parameters["strength"]
+    assert strength_param.default == pdd.DEFAULT_STRENGTH, (
+        f"llm_invoke() strength default is {strength_param.default}, "
+        f"expected pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
+    )
\ No newline at end of file