diff --git a/tests/test_e2e_issue_533_duplicate_validation.py b/tests/test_e2e_issue_533_duplicate_validation.py
new file mode 100644
index 000000000..11d5dbdd2
--- /dev/null
+++ b/tests/test_e2e_issue_533_duplicate_validation.py
@@ -0,0 +1,414 @@
+"""
+E2E tests for Issue #533: Orchestrator should validate LLM duplicate detection output.
+
+These E2E tests differ from the unit tests in test_issue_533_duplicate_validation.py:
+- Unit tests mock load_prompt_template and test orchestrator logic in isolation
+- E2E tests use REAL prompt loading via load_prompt_template (not mocked)
+- E2E tests exercise the full pipeline: prompt file → preprocess → format → orchestrator
+
+Bug: The orchestrator at line 441 of agentic_bug_orchestrator.py blindly trusts
+the LLM's duplicate detection output without validating that the original issue
+is actually resolved. When the LLM fails to follow prompt instructions and outputs
+"Duplicate of #520" even though #520 is OPEN, the orchestrator incorrectly triggers
+a hard stop and closes the issue.
+
+Real-world scenario (issue #530/#520):
+- User files issue #530 about a bug
+- LLM outputs "Duplicate of #520" without verifying #520's status
+- Issue #520 is still OPEN (unresolved)
+- Orchestrator blindly trusts LLM and stops workflow, closing #530
+- User had to manually reopen #530
+
+Root cause:
+The hard stop at line 441 checks for "Duplicate of #" but doesn't validate:
+    if step_num == 1 and "Duplicate of #" in output:
+        msg = f"Stopped at Step 1: Issue is a duplicate. {output.strip()}"
+        return False, msg, total_cost, last_model_used, changed_files
+
+Fix:
+The orchestrator should validate the original issue's state using `gh issue view`
+before triggering the hard stop. If the original issue is still OPEN (unresolved),
+the orchestrator should log a warning and continue the workflow.
+
+This is a regression of issue #469, which fixed the prompts but didn't add
+orchestrator-level validation as defense-in-depth against LLM instruction-following failures.
+
+Test Strategy:
+- Test 1: LLM outputs duplicate of UNRESOLVED issue → orchestrator validates & continues
+- Test 2: LLM outputs duplicate of RESOLVED issue → hard stop works correctly (regression)
+- Test 3: Exact scenario from issue #533 using real issue numbers #530/#520
+"""
+
+import os
+import re
+import subprocess
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from pdd.agentic_bug_orchestrator import run_agentic_bug_orchestrator
+
+# Project root: the worktree (or repo root) containing prompts/
+_PROJECT_ROOT = Path(__file__).resolve().parent.parent
+
+
+@pytest.fixture(autouse=True)
+def set_pdd_path_to_project_root():
+    """Ensure PDD_PATH points to the project root so load_prompt_template
+    picks up the prompts/ directory from this worktree, not an external install."""
+    old = os.environ.get("PDD_PATH")
+    os.environ["PDD_PATH"] = str(_PROJECT_ROOT)
+    yield
+    if old is not None:
+        os.environ["PDD_PATH"] = old
+    elif "PDD_PATH" in os.environ:
+        del os.environ["PDD_PATH"]
+
+
+@pytest.fixture
+def mock_git_repo(tmp_path):
+    """Create a minimal git repository for testing the orchestrator."""
+    repo_path = tmp_path / "test_repo"
+    repo_path.mkdir()
+
+    subprocess.run(
+        ["git", "init", "-b", "main"], cwd=repo_path,
+        check=True, capture_output=True
+    )
+    subprocess.run(
+        ["git", "config", "user.email", "test@test.com"],
+        cwd=repo_path, check=True
+    )
+    subprocess.run(
+        ["git", "config", "user.name", "Test User"],
+        cwd=repo_path, check=True
+    )
+
+    (repo_path / "README.md").write_text("# Test Repository\n")
+    subprocess.run(["git", "add", "."], cwd=repo_path, check=True)
+    subprocess.run(
+        ["git", "commit", "-m", "Initial commit"],
+        cwd=repo_path, check=True, capture_output=True
+    )
+
+    return repo_path
+
+
+class TestIssue533DuplicateValidationE2E:
+    """
+    E2E tests for Issue #533: Orchestrator should validate duplicate detection.
+
+    These tests exercise the real prompt loading, preprocessing, and formatting
+    pipeline — only the LLM execution layer (run_agentic_task) and git
+    operations (_setup_worktree) are mocked.
+    """
+
+    def test_llm_outputs_duplicate_of_unresolved_issue_workflow_continues(self, mock_git_repo):
+        """
+        E2E Test: When the LLM fails to follow prompt instructions and outputs
+        "Duplicate of #520" even though #520 is OPEN (unresolved), the orchestrator
+        should validate the original issue's state and continue the workflow.
+
+        This is the PRIMARY BUG SCENARIO from issue #533.
+
+        This exercises the full code path:
+        1. Real load_prompt_template (loads actual prompt file from disk)
+        2. Real preprocess() and format() (expands includes, substitutes vars)
+        3. Real orchestrator loop logic (step iteration, hard-stop checks)
+        4. Mocked LLM (returns output where it failed to follow prompt instructions)
+        5. Mocked worktree setup (avoids real git operations)
+
+        The mock LLM simulates what happened in the real bug: the LLM output
+        "Duplicate of #520" WITHOUT properly checking that #520 was resolved.
+        The orchestrator should catch this error and validate the issue state.
+
+        EXPECTED BEHAVIOR (after fix):
+        - Orchestrator detects "Duplicate of #" in LLM output
+        - Orchestrator calls `gh issue view 520` to validate
+        - Orchestrator sees #520 is OPEN (unresolved)
+        - Orchestrator logs warning about LLM failing to follow instructions
+        - Orchestrator continues workflow to Step 2
+
+        BUGGY BEHAVIOR (before fix):
+        - Orchestrator detects "Duplicate of #" in LLM output
+        - Orchestrator immediately triggers hard stop
+        - Workflow stops at Step 1, issue closed as duplicate
+        - User has to manually reopen the issue
+        """
+        mock_worktree = mock_git_repo / ".pdd" / "worktrees" / "fix-issue-533"
+        mock_worktree.mkdir(parents=True, exist_ok=True)
+
+        steps_executed = []
+
+        def mock_run_agentic_task(instruction, cwd, verbose, quiet, timeout, label, max_retries):
+            """Mock LLM that fails to follow prompt instructions."""
+            match = re.search(r"step(\d+(?:_\d+)?)", label)
+            if match:
+                steps_executed.append(label)
+
+            if "step1" in label:
+                # LLM FAILS to follow the prompt: it outputs "Duplicate of #520"
+                # without properly checking that #520 is OPEN (unresolved).
+                # This simulates the real-world bug from issue #533.
+                return (
+                    True,
+                    "## Step 1: Duplicate Check\n\n"
+                    "**Status:** Duplicate of #520\n\n"
+                    "### Search Performed\n"
+                    "- Searched for: pdd bug agents closes duplicated issues\n"
+                    "- Issues reviewed: 5\n\n"
+                    "### Findings\n"
+                    "Found issue #520 which reports the exact same problem. "
+                    "This is a duplicate.\n\n"
+                    "---",
+                    0.01,
+                    "mock-model",
+                )
+
+            if "step7" in label:
+                return (True, "Generated unit test\nFILES_CREATED: test_fix.py", 0.01, "mock-model")
+
+            return (True, f"Mock output for {label}", 0.01, "mock-model")
+
+        with patch("pdd.agentic_bug_orchestrator.run_agentic_task", side_effect=mock_run_agentic_task), \
+             patch("pdd.agentic_bug_orchestrator.console"), \
+             patch("pdd.agentic_bug_orchestrator._setup_worktree", return_value=(mock_worktree, None)), \
+             patch("subprocess.run") as mock_subprocess:
+
+            # Mock gh issue view to return that #520 is OPEN (unresolved)
+            def subprocess_side_effect(*args, **kwargs):
+                cmd = args[0] if args else kwargs.get('args', [])
+                if isinstance(cmd, list) and 'gh' in cmd and 'issue' in cmd and 'view' in cmd:
+                    # Return mock JSON showing issue #520 is OPEN
+                    mock_result = subprocess.CompletedProcess(
+                        args=cmd,
+                        returncode=0,
+                        stdout='{"number": 520, "state": "OPEN", "title": "Bug with pdd"}\n',
+                        stderr=''
+                    )
+                    return mock_result
+                # For git commands, return success
+                return subprocess.CompletedProcess(
+                    args=cmd, returncode=0, stdout='', stderr=''
+                )
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            success, message, cost, model, files = run_agentic_bug_orchestrator(
+                issue_url="https://github.com/promptdriven/pdd/issues/530",
+                issue_content="PDD bug agents still closes duplicated issues that are not resolved",
+                repo_owner="promptdriven",
+                repo_name="pdd",
+                issue_number=530,
+                issue_author="jiaminc-cmu",
+                issue_title="pdd bug agents still closes duplicated issues that are not resolved",
+                cwd=mock_git_repo,
+                verbose=False,
+                quiet=True,
+                use_github_state=False,
+            )
+
+        # The workflow should continue past Step 1 despite LLM outputting "Duplicate of #"
+        # because the orchestrator validates that #520 is OPEN (unresolved)
+        assert success is True, (
+            f"BUG DETECTED (Issue #533): Workflow should continue when LLM outputs "
+            f"duplicate of UNRESOLVED issue. The orchestrator should validate that "
+            f"#520 is OPEN and not trigger hard stop. Instead got: success={success}, "
+            f"msg={message}"
+        )
+        assert "step1" in steps_executed, "Step 1 should have executed"
+        assert len(steps_executed) == 11, (
+            f"All 11 steps should execute when the original duplicate is unresolved. "
+            f"The orchestrator should validate issue state before stopping. "
+            f"Got {len(steps_executed)} steps: {steps_executed}"
+        )
+
+    def test_llm_outputs_duplicate_of_resolved_issue_workflow_stops(self, mock_git_repo):
+        """
+        E2E Regression Test: When the LLM correctly identifies a CLOSED (resolved)
+        duplicate and outputs "Duplicate of #520", the workflow should hard-stop
+        at Step 1.
+
+        This ensures the fix for #533 doesn't break the valid duplicate detection path.
+
+        EXPECTED BEHAVIOR:
+        - LLM outputs "Duplicate of #520"
+        - Orchestrator validates with `gh issue view 520`
+        - Issue #520 is CLOSED (resolved)
+        - Orchestrator triggers hard stop
+        - Workflow stops at Step 1 (correct behavior)
+        """
+        mock_worktree = mock_git_repo / ".pdd" / "worktrees" / "fix-issue-533"
+        mock_worktree.mkdir(parents=True, exist_ok=True)
+
+        steps_executed = []
+
+        def mock_run_agentic_task(instruction, cwd, verbose, quiet, timeout, label, max_retries):
+            """Mock LLM that correctly identifies a resolved duplicate."""
+            match = re.search(r"step(\d+(?:_\d+)?)", label)
+            if match:
+                steps_executed.append(label)
+
+            if "step1" in label:
+                # LLM correctly identifies a resolved duplicate
+                return (
+                    True,
+                    "## Step 1: Duplicate Check\n\n"
+                    "**Status:** Duplicate of #520\n\n"
+                    "### Search Performed\n"
+                    "- Searched for: pdd bug agents closes duplicated issues\n"
+                    "- Issues reviewed: 5\n\n"
+                    "### Findings\n"
+                    "Issue #520 was resolved in PR #525. This is a duplicate.\n\n"
+                    "---",
+                    0.01,
+                    "mock-model",
+                )
+
+            return (True, f"Mock output for {label}", 0.01, "mock-model")
+
+        with patch("pdd.agentic_bug_orchestrator.run_agentic_task", side_effect=mock_run_agentic_task), \
+             patch("pdd.agentic_bug_orchestrator.console"), \
+             patch("pdd.agentic_bug_orchestrator._setup_worktree", return_value=(mock_worktree, None)), \
+             patch("subprocess.run") as mock_subprocess:
+
+            # Mock gh issue view to return that #520 is CLOSED (resolved)
+            def subprocess_side_effect(*args, **kwargs):
+                cmd = args[0] if args else kwargs.get('args', [])
+                if isinstance(cmd, list) and 'gh' in cmd and 'issue' in cmd and 'view' in cmd:
+                    # Return mock JSON showing issue #520 is CLOSED
+                    mock_result = subprocess.CompletedProcess(
+                        args=cmd,
+                        returncode=0,
+                        stdout='{"number": 520, "state": "CLOSED", "title": "Bug with pdd"}\n',
+                        stderr=''
+                    )
+                    return mock_result
+                # For git commands, return success
+                return subprocess.CompletedProcess(
+                    args=cmd, returncode=0, stdout='', stderr=''
+                )
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            success, message, cost, model, files = run_agentic_bug_orchestrator(
+                issue_url="https://github.com/promptdriven/pdd/issues/530",
+                issue_content="PDD bug agents still closes duplicated issues that are not resolved",
+                repo_owner="promptdriven",
+                repo_name="pdd",
+                issue_number=530,
+                issue_author="jiaminc-cmu",
+                issue_title="pdd bug agents still closes duplicated issues that are not resolved",
+                cwd=mock_git_repo,
+                verbose=False,
+                quiet=True,
+                use_github_state=False,
+            )
+
+        # Workflow should stop at Step 1 — this is CORRECT behavior for resolved duplicates
+        assert success is False, (
+            "Workflow should stop for resolved duplicates. This is the correct behavior."
+        )
+        assert "Stopped at Step 1" in message, (
+            f"Message should indicate Step 1 hard stop. Got: {message}"
+        )
+        assert "duplicate" in message.lower(), (
+            f"Message should mention duplicate. Got: {message}"
+        )
+        assert len(steps_executed) == 1, (
+            f"Only Step 1 should execute for a resolved duplicate. "
+            f"Got: {steps_executed}"
+        )
+
+    def test_exact_scenario_issue_533_with_real_issue_numbers(self, mock_git_repo):
+        """
+        E2E Test: Exact scenario from issue #533 using real issue numbers.
+
+        Real-world events:
+        - User filed issue #530 about a bug
+        - LLM Step 1 output: "Duplicate of #520"
+        - Issue #520 was OPEN (unresolved)
+        - Orchestrator stopped workflow and closed #530
+        - User had to manually reopen #530
+
+        This test verifies the orchestrator should validate that #520 is OPEN
+        and continue the workflow instead of blindly trusting the LLM.
+        """
+        mock_worktree = mock_git_repo / ".pdd" / "worktrees" / "fix-issue-530"
+        mock_worktree.mkdir(parents=True, exist_ok=True)
+
+        steps_executed = []
+
+        def mock_run_agentic_task(instruction, cwd, verbose, quiet, timeout, label, max_retries):
+            """Mock LLM that reproduces the exact output from issue #533."""
+            match = re.search(r"step(\d+(?:_\d+)?)", label)
+            if match:
+                steps_executed.append(label)
+
+            if "step1" in label:
+                # Reproduce the exact LLM output that caused issue #533
+                return (
+                    True,
+                    "## Step 1: Duplicate Check\n\n"
+                    "**Status:** Duplicate of #520\n\n"
+                    "This issue appears to be a duplicate of #520.\n",
+                    0.01,
+                    "mock-model",
+                )
+
+            if "step7" in label:
+                return (True, "Generated unit test\nFILES_CREATED: test_fix.py", 0.01, "mock-model")
+
+            return (True, f"Mock output for {label}", 0.01, "mock-model")
+
+        with patch("pdd.agentic_bug_orchestrator.run_agentic_task", side_effect=mock_run_agentic_task), \
+             patch("pdd.agentic_bug_orchestrator.console"), \
+             patch("pdd.agentic_bug_orchestrator._setup_worktree", return_value=(mock_worktree, None)), \
+             patch("subprocess.run") as mock_subprocess:
+
+            # Mock gh issue view to return that #520 is OPEN (matching real scenario)
+            def subprocess_side_effect(*args, **kwargs):
+                cmd = args[0] if args else kwargs.get('args', [])
+                if isinstance(cmd, list) and 'gh' in cmd and 'issue' in cmd and 'view' in cmd and '520' in str(cmd):
+                    # Return mock JSON showing issue #520 is OPEN (unresolved)
+                    mock_result = subprocess.CompletedProcess(
+                        args=cmd,
+                        returncode=0,
+                        stdout='{"number": 520, "state": "OPEN", "title": "pdd fails to use the latest version of claude 3.7 sonnet"}\n',
+                        stderr=''
+                    )
+                    return mock_result
+                # For git commands, return success
+                return subprocess.CompletedProcess(
+                    args=cmd, returncode=0, stdout='', stderr=''
+                )
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            success, message, cost, model, files = run_agentic_bug_orchestrator(
+                issue_url="https://github.com/promptdriven/pdd/issues/530",
+                issue_content="For example, this one:https://github.com/promptdriven/pdd/issues/520; I had to manually reopen it",
+                repo_owner="promptdriven",
+                repo_name="pdd",
+                issue_number=530,
+                issue_author="jiaminc-cmu",
+                issue_title="pdd bug agents still closes duplicated issues that are not resolved",
+                cwd=mock_git_repo,
+                verbose=False,
+                quiet=True,
+                use_github_state=False,
+            )
+
+        # This is the exact scenario from issue #533 - workflow should continue
+        assert success is True, (
+            f"BUG DETECTED (Issue #533 - Exact Scenario): This reproduces the exact "
+            f"bug reported in issue #533 where issue #530 was incorrectly closed as "
+            f"a duplicate of the still-OPEN issue #520. The orchestrator should "
+            f"validate that #520 is OPEN and continue the workflow. "
+            f"Instead got: success={success}, msg={message}"
+        )
+        assert len(steps_executed) == 11, (
+            f"All 11 steps should execute. Issue #530 should NOT be closed as a "
+            f"duplicate when #520 is still OPEN. Got {len(steps_executed)} steps: {steps_executed}"
+        )
diff --git a/tests/test_issue_533_duplicate_validation.py b/tests/test_issue_533_duplicate_validation.py
new file mode 100644
index 000000000..eeddfe0a5
--- /dev/null
+++ b/tests/test_issue_533_duplicate_validation.py
@@ -0,0 +1,479 @@
+"""
+Tests for Issue #533: Orchestrator should validate LLM duplicate detection output.
+
+Bug: The orchestrator at line 441 of agentic_bug_orchestrator.py blindly trusts
+the LLM's duplicate detection output without validating that the original issue
+is actually resolved. When the LLM fails to follow prompt instructions and outputs
+"Duplicate of #520" even though #520 is OPEN, the orchestrator incorrectly triggers
+a hard stop and closes the issue.
+
+Root cause:
+The hard stop at line 441 checks for the string "Duplicate of #" but doesn't
+validate the original issue's state:
+    if step_num == 1 and "Duplicate of #" in output:
+        msg = f"Stopped at Step 1: Issue is a duplicate. {output.strip()}"
+        return False, msg, total_cost, last_model_used, changed_files
+
+Fix:
+The orchestrator should validate the original issue's state using `gh issue view`
+before triggering the hard stop. If the original issue is still OPEN (unresolved),
+the orchestrator should log a warning and continue the workflow instead of stopping.
+
+These tests verify:
+1. When LLM outputs duplicate of UNRESOLVED issue → orchestrator validates & continues
+2. When LLM outputs duplicate of RESOLVED issue → hard stop works correctly
+3. Edge cases: invalid issue numbers, network errors, various output formats
+4. Fail-safe behavior: errors default to letting workflow continue
+
+This is a regression of issue #469, which fixed the prompts but didn't add
+orchestrator-level validation as defense-in-depth.
+"""
+
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+import subprocess
+
+from pdd.agentic_bug_orchestrator import run_agentic_bug_orchestrator
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def bug_mock_dependencies(tmp_path):
+    """Mocks for the bug orchestrator."""
+    mock_worktree_path = tmp_path / ".pdd" / "worktrees" / "fix-issue-533"
+    mock_worktree_path.mkdir(parents=True, exist_ok=True)
+
+    with patch("pdd.agentic_bug_orchestrator.run_agentic_task") as mock_run, \
+         patch("pdd.agentic_bug_orchestrator.load_prompt_template") as mock_load, \
+         patch("pdd.agentic_bug_orchestrator.console") as mock_console, \
+         patch("pdd.agentic_bug_orchestrator._setup_worktree") as mock_worktree:
+
+        mock_run.return_value = (True, "Step output", 0.1, "gpt-4")
+        mock_load.return_value = "Prompt for {issue_number}"
+        mock_worktree.return_value = (mock_worktree_path, None)
+
+        yield mock_run, mock_load, mock_console, mock_worktree
+
+
+@pytest.fixture
+def bug_default_args(tmp_path):
+    """Default arguments for the bug orchestrator."""
+    return {
+        "issue_url": "http://github.com/owner/repo/issues/533",
+        "issue_content": "Bug description",
+        "repo_owner": "owner",
+        "repo_name": "repo",
+        "issue_number": 533,
+        "issue_author": "user",
+        "issue_title": "Bug Title",
+        "cwd": tmp_path,
+        "verbose": False,
+        "quiet": True,
+        "use_github_state": False,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Test 1: LLM outputs duplicate of UNRESOLVED issue → workflow should continue
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_llm_outputs_duplicate_of_unresolved_issue(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    PRIMARY BUG SCENARIO: When the LLM fails to follow prompt instructions and
+    outputs "Duplicate of #520" even though #520 is OPEN (unresolved), the
+    orchestrator should validate the original issue's state and continue the
+    workflow instead of incorrectly stopping.
+
+    This test simulates the exact scenario from issue #533/#530/#520:
+    - Issue #530 reports a bug
+    - LLM outputs "Duplicate of #520" (failing to check resolution status)
+    - Issue #520 is still OPEN (unresolved)
+    - Expected: Orchestrator validates, logs warning, continues to Step 2
+    - Buggy behavior: Hard stop at Step 1, issue closed as duplicate
+
+    This test will FAIL on the current buggy code (before fix) because line 441
+    doesn't validate the original issue's state.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    # Simulate LLM failing to follow instructions: outputs duplicate without
+    # checking that the original issue is still OPEN
+    llm_output_duplicate_unresolved = (
+        "## Step 1: Duplicate Check\n\n"
+        "**Status:** Duplicate of #520\n\n"
+        "### Findings\n"
+        "This issue has the same symptoms as #520."
+    )
+
+    def side_effect(*args, **kwargs):
+        label = kwargs.get("label", "")
+        if label == "step1":
+            return (True, llm_output_duplicate_unresolved, 0.1, "gpt-4")
+        if label == "step7":
+            return (True, "Generated test\nFILES_CREATED: test_fix.py", 0.1, "gpt-4")
+        return (True, f"Output for {label}", 0.1, "gpt-4")
+
+    mock_run.side_effect = side_effect
+
+    # Mock gh issue view to return OPEN state for issue #520
+    with patch("subprocess.run") as mock_subprocess:
+        # gh issue view #520 should return OPEN state
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stdout = "state: OPEN\ntitle: Original Issue\n"
+        mock_subprocess.return_value = mock_result
+
+        success, msg, cost, model, files = run_agentic_bug_orchestrator(
+            **bug_default_args
+        )
+
+    # CRITICAL ASSERTION: Workflow should continue (not stop at Step 1)
+    # The buggy code will FAIL this assertion because it triggers hard stop
+    # without validating the original issue's state
+    assert success is True, (
+        f"Workflow should continue when LLM outputs duplicate of UNRESOLVED issue. "
+        f"The orchestrator should validate that #520 is OPEN and not trigger hard stop. "
+        f"Instead got: success={success}, msg={msg}"
+    )
+
+    # Verify workflow completed all steps (not stopped at Step 1)
+    assert mock_run.call_count == 11, (
+        f"All 11 steps should execute when original issue is unresolved. "
+        f"Got {mock_run.call_count} steps instead."
+    )
+
+    assert "Investigation complete" in msg, (
+        f"Expected completion message, got: {msg}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 2: LLM outputs duplicate of RESOLVED issue → hard stop (regression test)
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_llm_outputs_duplicate_of_resolved_issue(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    REGRESSION TEST: When the LLM outputs "Duplicate of #520" and #520 is
+    actually CLOSED (resolved), the orchestrator should trigger the hard stop.
+    This is the correct behavior and should not be broken by the fix.
+
+    This ensures we don't break valid duplicate detection when fixing the bug.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    # LLM correctly outputs duplicate of a resolved issue
+    llm_output_duplicate_resolved = (
+        "## Step 1: Duplicate Check\n\n"
+        "**Status:** Duplicate of #520\n\n"
+        "### Findings\n"
+        "Issue #520 was resolved in PR #525. This is a duplicate."
+    )
+    mock_run.return_value = (True, llm_output_duplicate_resolved, 0.05, "claude")
+
+    # Mock gh issue view to return CLOSED state for issue #520
+    with patch("subprocess.run") as mock_subprocess:
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stdout = "state: CLOSED\ntitle: Original Issue\n"
+        mock_subprocess.return_value = mock_result
+
+        success, msg, cost, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+    # Hard stop should be triggered (correct duplicate detection)
+    assert success is False, (
+        f"Workflow should stop when original issue is CLOSED. Got success={success}"
+    )
+    assert "Stopped at Step 1" in msg
+    assert "duplicate" in msg.lower()
+    assert mock_run.call_count == 1
+    assert cost == 0.05
+
+
+# ---------------------------------------------------------------------------
+# Test 3: Various duplicate output formats
+# ---------------------------------------------------------------------------
+
+class TestDuplicateOutputFormats:
+    """
+    Test that the orchestrator correctly extracts issue numbers from various
+    LLM output formats and validates them.
+    """
+
+    @pytest.mark.parametrize(
+        "llm_output,expected_issue_num",
+        [
+            # Standard format
+            ("Duplicate of #520", "520"),
+            # With context
+            ("Duplicate of #520 (resolved in PR #525)", "520"),
+            # Multiple issue mentions (should extract first after "Duplicate of")
+            ("Related to #100, but Duplicate of #520", "520"),
+            # With markdown
+            ("**Status:** Duplicate of #520\n\nClosing.", "520"),
+        ],
+        ids=[
+            "standard_format",
+            "with_pr_reference",
+            "multiple_issues",
+            "with_markdown",
+        ],
+    )
+    def test_duplicate_extraction_formats(
+        self, bug_mock_dependencies, bug_default_args, llm_output, expected_issue_num
+    ):
+        """
+        Verify the orchestrator correctly extracts issue numbers from various
+        output formats and validates them with gh issue view.
+        """
+        mock_run, _, _, _ = bug_mock_dependencies
+        mock_run.return_value = (True, llm_output, 0.05, "claude")
+
+        # Mock gh issue view to return OPEN state
+        with patch("subprocess.run") as mock_subprocess:
+            mock_result = MagicMock()
+            mock_result.returncode = 0
+            mock_result.stdout = "state: OPEN\ntitle: Issue\n"
+            mock_subprocess.return_value = mock_result
+
+            success, msg, _, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+            # Verify gh issue view was called with correct issue number
+            assert mock_subprocess.called, (
+                "orchestrator should call gh issue view to validate"
+            )
+            # Find the call that checks the issue state
+            gh_calls = [
+                call for call in mock_subprocess.call_args_list
+                if call[0][0][0:2] == ["gh", "issue"]
+            ]
+            assert len(gh_calls) > 0, "Should have called gh issue view"
+            # Extract issue number from the call
+            assert expected_issue_num in str(gh_calls[0]), (
+                f"Should check issue #{expected_issue_num}, calls: {gh_calls}"
+            )
+
+        # Since original is OPEN, workflow should continue
+        assert success is True, f"Should continue when original is OPEN, got: {msg}"
+
+
+# ---------------------------------------------------------------------------
+# Test 4: Invalid issue number handling
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_invalid_issue_number_failsafe(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    FAIL-SAFE TEST: When the LLM outputs a duplicate of a non-existent issue
+    (e.g., #99999), gh issue view will fail. The orchestrator should treat
+    this as "unresolved" (fail-safe) and let the workflow continue rather
+    than crashing or incorrectly stopping.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    llm_output_invalid = "Duplicate of #99999"
+
+    def side_effect(*args, **kwargs):
+        label = kwargs.get("label", "")
+        if label == "step1":
+            return (True, llm_output_invalid, 0.1, "gpt-4")
+        if label == "step7":
+            return (True, "Generated test\nFILES_CREATED: test_fix.py", 0.1, "gpt-4")
+        return (True, f"Output for {label}", 0.1, "gpt-4")
+
+    mock_run.side_effect = side_effect
+
+    # Mock gh issue view to return error (issue not found)
+    with patch("subprocess.run") as mock_subprocess:
+        mock_result = MagicMock()
+        mock_result.returncode = 1  # Error
+        mock_result.stderr = "issue not found"
+        mock_subprocess.return_value = mock_result
+
+        success, msg, _, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+    # Fail-safe: should continue workflow (treat as unresolved)
+    assert success is True, (
+        f"When gh issue view fails, orchestrator should fail-safe and continue. "
+        f"Got: success={success}, msg={msg}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 5: Network/CLI error handling
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_gh_cli_error_failsafe(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    FAIL-SAFE TEST: When gh CLI fails (network error, timeout, rate limit),
+    the orchestrator should log the error and treat the issue as unresolved
+    (fail-safe), allowing the workflow to continue.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    llm_output = "Duplicate of #520"
+
+    def side_effect(*args, **kwargs):
+        label = kwargs.get("label", "")
+        if label == "step1":
+            return (True, llm_output, 0.1, "gpt-4")
+        if label == "step7":
+            return (True, "Generated test\nFILES_CREATED: test_fix.py", 0.1, "gpt-4")
+        return (True, f"Output for {label}", 0.1, "gpt-4")
+
+    mock_run.side_effect = side_effect
+
+    # Mock subprocess.run to handle both git commands and gh commands
+    with patch("subprocess.run") as mock_subprocess:
+        def subprocess_side_effect(cmd, *args, **kwargs):
+            # Let git commands work normally (needed for _get_git_root)
+            if cmd[0] == "git":
+                mock_result = MagicMock()
+                mock_result.returncode = 0
+                mock_result.stdout = str(bug_default_args["cwd"])
+                return mock_result
+            # Make gh commands raise timeout
+            elif cmd[0] == "gh":
+                raise subprocess.TimeoutExpired("gh", 5)
+            # Default
+            mock_result = MagicMock()
+            mock_result.returncode = 0
+            return mock_result
+
+        mock_subprocess.side_effect = subprocess_side_effect
+
+        success, msg, _, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+    # Fail-safe: should continue workflow
+    assert success is True, (
+        f"When gh CLI fails/times out, orchestrator should fail-safe and continue. "
+        f"Got: success={success}, msg={msg}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 6: Regression test - exact scenario from issue #533
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_issue_533_exact_scenario(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    REGRESSION TEST FOR ISSUE #533: Simulate the exact scenario that triggered
+    the bug report:
+    - User filed issue #530
+    - LLM output "Duplicate of #520" without checking resolution
+    - Issue #520 was still OPEN (created 2026-02-14, never resolved)
+    - Bug: Orchestrator closed #530 as duplicate
+    - Fix: Orchestrator should validate #520 is OPEN and continue workflow
+
+    This test uses the actual issue numbers from the incident.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    # Update args to simulate issue #530
+    bug_default_args["issue_number"] = 530
+    bug_default_args["issue_url"] = "http://github.com/owner/repo/issues/530"
+
+    # LLM output that triggered the bug (claims #520 is duplicate)
+    llm_output_530 = (
+        "## Step 1: Duplicate Check\n\n"
+        "Duplicate of #520\n\n"
+        "This issue appears to be the same as #520."
+    )
+
+    def side_effect(*args, **kwargs):
+        label = kwargs.get("label", "")
+        if label == "step1":
+            return (True, llm_output_530, 0.1, "gpt-4")
+        if label == "step7":
+            return (True, "Generated test\nFILES_CREATED: test_fix.py", 0.1, "gpt-4")
+        return (True, f"Output for {label}", 0.1, "gpt-4")
+
+    mock_run.side_effect = side_effect
+
+    # Mock gh issue view for #520 - it's still OPEN (unresolved)
+    with patch("subprocess.run") as mock_subprocess:
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stdout = "state: OPEN\ntitle: Original bug\ncreatedAt: 2026-02-14\n"
+        mock_subprocess.return_value = mock_result
+
+        success, msg, _, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+    # The fix should prevent the buggy hard stop
+    assert success is True, (
+        f"Issue #530 should NOT have been closed as duplicate of unresolved #520. "
+        f"This is the exact bug from issue #533. Got: success={success}, msg={msg}"
+    )
+    assert "Investigation complete" in msg
+
+
+# ---------------------------------------------------------------------------
+# Test 7: No false positives - conversational mentions shouldn't validate
+# ---------------------------------------------------------------------------
+
+def test_bug_orchestrator_no_validation_without_duplicate_marker(
+    bug_mock_dependencies, bug_default_args
+):
+    """
+    Verify that the orchestrator only validates when the LLM output contains
+    the exact "Duplicate of #" marker. Conversational mentions of related
+    issues should not trigger validation.
+
+    This ensures the fix doesn't add unnecessary overhead for outputs that
+    mention related issues without claiming they are duplicates.
+    """
+    mock_run, _, _, _ = bug_mock_dependencies
+
+    # Output mentions related issue but doesn't claim duplicate
+    llm_output_no_duplicate = (
+        "## Step 1: Duplicate Check\n\n"
+        "**Status:** No duplicates found\n\n"
+        "### Findings\n"
+        "Found related issue #520, but it has different symptoms. "
+        "Proceeding with investigation."
+    )
+
+    def side_effect(*args, **kwargs):
+        label = kwargs.get("label", "")
+        if label == "step1":
+            return (True, llm_output_no_duplicate, 0.1, "gpt-4")
+        if label == "step7":
+            return (True, "Generated test\nFILES_CREATED: test_fix.py", 0.1, "gpt-4")
+        return (True, f"Output for {label}", 0.1, "gpt-4")
+
+    mock_run.side_effect = side_effect
+
+    # Mock subprocess to track if gh issue view is called
+    with patch("subprocess.run") as mock_subprocess:
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stdout = "state: OPEN\n"
+        mock_subprocess.return_value = mock_result
+
+        success, msg, _, _, _ = run_agentic_bug_orchestrator(**bug_default_args)
+
+    # Workflow should continue
+    assert success is True, f"Workflow should continue, got: {msg}"
+
+    # gh issue view should NOT have been called (no duplicate marker)
+    # Note: This checks that we don't add unnecessary validation overhead
+    # when the LLM correctly follows instructions and doesn't claim a duplicate
+    gh_calls = [
+        call for call in mock_subprocess.call_args_list
+        if call[0][0][0:2] == ["gh", "issue"]
+    ]
+    assert len(gh_calls) == 0, (
+        "Should not call gh issue view when no 'Duplicate of #' marker present"
+    )