Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pdd/templates/generic/generate_pddrc_YAML.prompt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ contexts:
example_output_path: "examples/" # Where example files go
default_language: "python" # Primary language
target_coverage: 90.0 # Test coverage target (%)
strength: 0.75 # LLM generation strength (0-1)
strength: 1.0 # LLM generation strength (0-1)
temperature: 0.0 # LLM temperature (0-1)
budget: 10.0 # Cost budget per operation ($)
max_attempts: 3 # Max retry attempts
Expand Down Expand Up @@ -105,7 +105,7 @@ INSTRUCTIONS:
* Utils/Shared: 85-90% (reusable code)
* CLI: 85% (user-facing interfaces)
- **auto_deps_csv_path**: "project_dependencies.csv" (ALWAYS include this)
- **strength**: 0.75 (standard - can be omitted, PDD will use default)
- **strength**: 1.0 (standard - can be omitted, PDD will use default)
- **temperature**: 0.0 (deterministic - can be omitted, PDD will use default)
- **budget**: 10.0 (standard - can be omitted, PDD will use default)
- **max_attempts**: 3 (standard - can be omitted, PDD will use default)
Expand Down Expand Up @@ -153,7 +153,7 @@ contexts:
example_output_path: "examples/"
default_language: "python"
target_coverage: 90.0
strength: 0.75 # Optional: LLM generation strength
strength: 1.0 # Optional: LLM generation strength
temperature: 0.0 # Optional: LLM temperature
budget: 10.0 # Optional: Cost budget per operation
max_attempts: 3 # Optional: Max retry attempts
Expand Down
47 changes: 47 additions & 0 deletions tests/core/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,53 @@ def test_process_commands_fatal_exception(mock_write_dump, mock_print):
process_commands(results=[({}, 0.1, "gpt-4")])
ctx.exit.assert_called_with(1)

def test_cli_help_shows_correct_default_strength(runner):
"""Issue #505: CLI help text for --strength must display the actual DEFAULT_STRENGTH.

The help string in pdd/core/cli.py:220 hardcodes "Default: 0.75" but the
canonical constant in pdd/__init__.py is DEFAULT_STRENGTH = 1.0. This test
ensures the help text always reflects the real default so users are not
misled about which model tier they are using.
"""
result = runner.invoke(cli_command, ["--help"])
assert result.exit_code == 0
# The help text must contain the canonical DEFAULT_STRENGTH value
expected_fragment = f"Default: {DEFAULT_STRENGTH}"
assert expected_fragment in result.output, (
f"CLI --help should say '{expected_fragment}' but got:\n{result.output}"
)
# The stale value 0.75 must NOT appear in the strength help text
assert "Default: 0.75" not in result.output, (
"CLI --help still contains the stale 'Default: 0.75' for --strength"
)


def test_default_strength_consistent_across_modules(runner):
"""Issue #505: DEFAULT_STRENGTH must be consistent across all modules.

Imports DEFAULT_STRENGTH from pdd (canonical) and pdd.server.executor,
and checks that the CLI help string references the same value. This
prevents future drift when the constant is updated in one place but
not in others.
"""
import pdd
import pdd.server.executor as executor_mod

# 1. executor module's DEFAULT_STRENGTH must match the canonical constant
assert executor_mod.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, (
f"executor.DEFAULT_STRENGTH={executor_mod.DEFAULT_STRENGTH} != "
f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
)

# 2. CLI help text must reference the canonical value
result = runner.invoke(cli_command, ["--help"])
assert result.exit_code == 0
expected_fragment = f"Default: {pdd.DEFAULT_STRENGTH}"
assert expected_fragment in result.output, (
f"CLI help should contain '{expected_fragment}' but got:\n{result.output}"
)


if __name__ == "__main__":
import pytest
sys.exit(pytest.main([__file__]))
44 changes: 43 additions & 1 deletion tests/server/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,4 +254,46 @@ def test_get_pdd_command_fallback():
# We can't easily force an ImportError inside the function without complex mocking of sys.modules,
# but we can test the fallback for an unknown name.
cmd = get_pdd_command("definitely_not_a_real_command")
assert cmd is None
assert cmd is None


def test_executor_default_strength_matches_canonical():
"""Issue #505: executor.DEFAULT_STRENGTH must match pdd.DEFAULT_STRENGTH.

The ImportError fallback at pdd/server/executor.py:16 hardcodes
DEFAULT_STRENGTH = 0.5, but the canonical constant in pdd/__init__.py
is 1.0. This test inspects the source code to verify the fallback
value matches, catching drift even when the import succeeds at runtime.
"""
import ast
import pdd
import inspect

# Runtime check: the loaded value must match canonical
assert executor_module.DEFAULT_STRENGTH == pdd.DEFAULT_STRENGTH, (
f"executor.DEFAULT_STRENGTH={executor_module.DEFAULT_STRENGTH} != "
f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
)

# Source-level check: the hardcoded fallback in the except ImportError
# block must also match the canonical value. This catches the case where
# the import succeeds at test time but the fallback would be wrong in a
# different deployment environment.
source = inspect.getsource(executor_module)
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, ast.ExceptHandler):
for stmt in ast.walk(node):
if (isinstance(stmt, ast.Assign)
and any(
isinstance(t, ast.Name) and t.id == "DEFAULT_STRENGTH"
for t in stmt.targets
)):
# Extract the hardcoded fallback value
value_node = stmt.value
if isinstance(value_node, ast.Constant):
assert value_node.value == pdd.DEFAULT_STRENGTH, (
f"Hardcoded fallback DEFAULT_STRENGTH={value_node.value} "
f"in executor.py ImportError handler does not match "
f"pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
)
215 changes: 215 additions & 0 deletions tests/test_e2e_issue_505_default_strength.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""
E2E Test (Subprocess-based) for Issue #505: CLI help text shows wrong
DEFAULT_STRENGTH (0.75 vs actual 1.0).

This is a true E2E test that uses subprocess to invoke the actual CLI binary,
exercising the full code path that a user would take.

Bug: When running ``pdd --help``, the ``--strength`` option displays
"Default: 0.75 or .pddrc value" but the actual default used at runtime
(``pdd.DEFAULT_STRENGTH``) is ``1.0``. Users who rely on the help text
believe they are using a mid-tier model (0.75) but are actually charged
for the most powerful model (1.0).

E2E Test Strategy:
- Use subprocess to run ``python -m pdd.cli --help`` (like a real user)
- Parse the ``--strength`` help text from stdout
- Assert the documented default matches the canonical constant in
``pdd/__init__.py``
- Also run ``python -c "from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)"``
to read the canonical value dynamically — no hardcoded expected value

The test should:
- FAIL on the current buggy code (help says 0.75, canonical says 1.0)
- PASS once the bug is fixed (help says 1.0, matching canonical)

Issue: https://github.com/promptdriven/pdd/issues/505
"""

import os
import re
import subprocess
import sys
from pathlib import Path

import pytest


def get_project_root() -> Path:
"""Get the project root directory."""
current = Path(__file__).parent
while current != current.parent:
if (current / "pdd").is_dir() and (current / "pyproject.toml").exists():
return current
current = current.parent
raise RuntimeError("Could not find project root with pdd/ directory")


@pytest.mark.e2e
class TestIssue505E2ESubprocess:
"""
E2E tests using subprocess to verify the --strength default in CLI help.

These tests exercise the full CLI path that users take when running
``pdd --help`` to check available options and their defaults.
"""

def _run_pdd_help(self, timeout: int = 30) -> str:
"""Run ``pdd --help`` via subprocess and return combined output."""
project_root = get_project_root()
env = os.environ.copy()
env["PYTHONPATH"] = str(project_root)
# Prevent auto-update checks from interfering
env["PDD_AUTO_UPDATE"] = "false"

result = subprocess.run(
[sys.executable, "-m", "pdd.cli", "--help"],
capture_output=True,
text=True,
cwd=str(project_root),
env=env,
timeout=timeout,
)
return result.stdout + result.stderr

def _get_canonical_default_strength(self, timeout: int = 10) -> str:
"""Read DEFAULT_STRENGTH from ``pdd/__init__.py`` via subprocess."""
project_root = get_project_root()
env = os.environ.copy()
env["PYTHONPATH"] = str(project_root)

result = subprocess.run(
[
sys.executable, "-c",
"from pdd import DEFAULT_STRENGTH; print(DEFAULT_STRENGTH)",
],
capture_output=True,
text=True,
cwd=str(project_root),
env=env,
timeout=timeout,
)
assert result.returncode == 0, (
f"Failed to read DEFAULT_STRENGTH: {result.stderr}"
)
return result.stdout.strip()

# ------------------------------------------------------------------
# Test 1: The core user-facing bug
# ------------------------------------------------------------------
def test_pdd_help_strength_default_matches_canonical(self):
"""
E2E: ``pdd --help`` must show the correct DEFAULT_STRENGTH value.

User scenario:
1. User runs ``pdd --help`` to see available options
2. User reads the --strength option and its documented default
3. User trusts the help text and does NOT explicitly set --strength

Expected: Help text says "Default: 1.0" (the canonical value)
Actual (bug): Help text says "Default: 0.75" (stale value)

This test FAILS on buggy code, PASSES after fix.
"""
canonical = self._get_canonical_default_strength()
help_output = self._run_pdd_help()

# Extract the strength help line
# The help text contains something like:
# --strength ... Default: 0.75 or .pddrc value.
expected_fragment = f"Default: {canonical}"
assert expected_fragment in help_output, (
f"BUG DETECTED (Issue #505): CLI --help does not show the correct "
f"DEFAULT_STRENGTH.\n"
f" Expected to find: '{expected_fragment}'\n"
f" Canonical DEFAULT_STRENGTH: {canonical}\n\n"
f"Users see incorrect default and may incur unexpected API costs.\n\n"
f"Full --help output:\n{help_output}"
)

# ------------------------------------------------------------------
# Test 2: Stale value must NOT appear
# ------------------------------------------------------------------
def test_pdd_help_does_not_show_stale_075(self):
"""
E2E: ``pdd --help`` must NOT claim the strength default is 0.75.

This guards against the specific stale value reported in the issue.

This test FAILS on buggy code, PASSES after fix.
"""
help_output = self._run_pdd_help()

assert "Default: 0.75" not in help_output, (
f"BUG DETECTED (Issue #505): CLI --help still contains the stale "
f"'Default: 0.75' for --strength.\n"
f"The actual DEFAULT_STRENGTH is 1.0.\n\n"
f"Full --help output:\n{help_output}"
)

# ------------------------------------------------------------------
# Test 3: Full round-trip — help text ↔ runtime default
# ------------------------------------------------------------------
def test_help_default_matches_runtime_config_resolution(self):
"""
E2E: The default shown in ``--help`` must match what the config
resolution layer actually uses when no ``--strength`` is provided.

This exercises two separate code paths end-to-end:
1. CLI help text rendering (``pdd/core/cli.py``)
2. Config resolution (``pdd/core/config_resolution.py`` →
``pdd.DEFAULT_STRENGTH``)

If these disagree, users are misled about which model tier they use.

This test FAILS on buggy code, PASSES after fix.
"""
project_root = get_project_root()
env = os.environ.copy()
env["PYTHONPATH"] = str(project_root)
# Ensure no .pddrc override so config_resolution falls back to
# DEFAULT_STRENGTH
env.pop("PDD_STRENGTH", None)
env["PDD_AUTO_UPDATE"] = "false"

# Step 1: Get the canonical DEFAULT_STRENGTH
canonical = self._get_canonical_default_strength()

# Step 2: Get what config_resolution actually resolves to
result = subprocess.run(
[
sys.executable, "-c",
(
"import sys, os; "
"os.environ.pop('PDD_STRENGTH', None); "
"from pdd.core.config_resolution import resolve_strength; "
"print(resolve_strength(None, None))"
),
],
capture_output=True,
text=True,
cwd=str(project_root),
env=env,
timeout=10,
)
resolved_strength = result.stdout.strip()

# Step 3: Get the help text
help_output = self._run_pdd_help()

# All three must agree
expected_fragment = f"Default: {canonical}"
assert expected_fragment in help_output, (
f"BUG DETECTED (Issue #505): Help text default doesn't match "
f"canonical DEFAULT_STRENGTH.\n"
f" Canonical: {canonical}\n"
f" Resolved at runtime: {resolved_strength}\n"
f" Help text does not contain '{expected_fragment}'\n\n"
f"Full --help output:\n{help_output}"
)

assert canonical == resolved_strength, (
f"DEFAULT_STRENGTH ({canonical}) != resolved strength "
f"({resolved_strength}) — config_resolution disagrees with "
f"pdd/__init__.py"
)
21 changes: 20 additions & 1 deletion tests/test_llm_invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -4683,4 +4683,23 @@ def capture_completion(**kwargs):

# time=None should be treated as 0, so no reasoning params
assert "thinking" not in captured_kwargs
assert "reasoning_effort" not in captured_kwargs
assert "reasoning_effort" not in captured_kwargs


def test_llm_invoke_default_strength_matches_canonical():
"""Issue #505: llm_invoke() default strength must match pdd.DEFAULT_STRENGTH.

The function signature at pdd/llm_invoke.py:1657 hardcodes
strength: float = 0.5, but the canonical constant in pdd/__init__.py
is DEFAULT_STRENGTH = 1.0. This test uses inspect.signature() to
verify the parameter default matches the source of truth.
"""
import inspect
import pdd

sig = inspect.signature(llm_invoke)
strength_param = sig.parameters["strength"]
assert strength_param.default == pdd.DEFAULT_STRENGTH, (
f"llm_invoke() strength default is {strength_param.default}, "
f"expected pdd.DEFAULT_STRENGTH={pdd.DEFAULT_STRENGTH}"
)
Loading