|
| 1 | +--- |
| 2 | +title: Iterative Refinement |
| 3 | +description: Implement iterative refinement workflows where agents refine their work based on critique feedback until quality thresholds are met. |
| 4 | +--- |
| 5 | + |
| 6 | +## Overview |
| 7 | + |
| 8 | +Iterative refinement is a powerful pattern where multiple agents work together in a feedback loop: |
| 9 | +1. A **refactoring agent** performs the main task (e.g., code conversion) |
| 10 | +2. A **critique agent** evaluates the quality and provides detailed feedback |
| 11 | +3. If quality is below threshold, the refactoring agent tries again with the feedback |
| 12 | + |
| 13 | +This pattern is useful for: |
| 14 | +- Code refactoring and modernization (e.g., COBOL to Java) |
| 15 | +- Document translation and localization |
| 16 | +- Content generation with quality requirements |
| 17 | +- Any task requiring iterative improvement |
| 18 | + |
| 19 | +## Quick Start |
| 20 | + |
| 21 | +<Note> |
| 22 | +This example is available on GitHub: [examples/01_standalone_sdk/31_iterative_refinement.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/31_iterative_refinement.py) |
| 23 | +</Note> |
| 24 | + |
| 25 | +```python icon="python" expandable examples/01_standalone_sdk/31_iterative_refinement.py |
| 26 | +""" |
| 27 | +Iterative Refinement Example: COBOL to Java Refactoring |
| 28 | +
|
| 29 | +This example demonstrates an iterative refinement workflow where: |
| 30 | +1. A refactoring agent converts COBOL files to Java files |
| 31 | +2. A critique agent evaluates the quality of each conversion and provides scores |
| 32 | +3. If the average score is below 90%, the process repeats with feedback |
| 33 | +
|
| 34 | +The workflow continues until the refactoring meets the quality threshold. |
| 35 | +""" |
| 36 | + |
| 37 | +import os |
| 38 | +import re |
| 39 | +import tempfile |
| 40 | +from pathlib import Path |
| 41 | + |
| 42 | +from pydantic import SecretStr |
| 43 | + |
| 44 | +from openhands.sdk import LLM, Conversation |
| 45 | +from openhands.tools.preset.default import get_default_agent |
| 46 | + |
| 47 | + |
| 48 | +QUALITY_THRESHOLD = 90.0 |
| 49 | +MAX_ITERATIONS = 5 |
| 50 | + |
| 51 | + |
| 52 | +def setup_workspace() -> tuple[Path, Path, Path]: |
| 53 | + """Create workspace directories for the refactoring workflow.""" |
| 54 | + workspace_dir = Path(tempfile.mkdtemp()) |
| 55 | + cobol_dir = workspace_dir / "cobol" |
| 56 | + java_dir = workspace_dir / "java" |
| 57 | + critique_dir = workspace_dir / "critiques" |
| 58 | + |
| 59 | + cobol_dir.mkdir(parents=True, exist_ok=True) |
| 60 | + java_dir.mkdir(parents=True, exist_ok=True) |
| 61 | + critique_dir.mkdir(parents=True, exist_ok=True) |
| 62 | + |
| 63 | + return workspace_dir, cobol_dir, java_dir |
| 64 | + |
| 65 | + |
| 66 | +def create_sample_cobol_files(cobol_dir: Path) -> list[str]: |
| 67 | + """Create sample COBOL files for demonstration.""" |
| 68 | + # Sample COBOL files based on AWS CardDemo structure |
| 69 | + sample_files = { |
| 70 | + "CBACT01C.cbl": "...", # Account Display Program |
| 71 | + "CBCUS01C.cbl": "...", # Customer Information Program |
| 72 | + "CBTRN01C.cbl": "...", # Transaction Processing Program |
| 73 | + } |
| 74 | + created_files = [] |
| 75 | + for filename, content in sample_files.items(): |
| 76 | + file_path = cobol_dir / filename |
| 77 | + file_path.write_text(content) |
| 78 | + created_files.append(filename) |
| 79 | + return created_files |
| 80 | + |
| 81 | + |
| 82 | +def parse_critique_score(critique_file: Path) -> float: |
| 83 | + """Parse the average score from the critique report.""" |
| 84 | + if not critique_file.exists(): |
| 85 | + return 0.0 |
| 86 | + content = critique_file.read_text() |
| 87 | + patterns = [ |
| 88 | + r"\*\*Average Score\*\*:\s*(\d+(?:\.\d+)?)", |
| 89 | + r"Average Score:\s*(\d+(?:\.\d+)?)", |
| 90 | + ] |
| 91 | + for pattern in patterns: |
| 92 | + match = re.search(pattern, content, re.IGNORECASE) |
| 93 | + if match: |
| 94 | + return float(match.group(1)) |
| 95 | + return 0.0 |
| 96 | + |
| 97 | + |
| 98 | +def run_iterative_refinement() -> None: |
| 99 | + """Run the iterative refinement workflow.""" |
| 100 | + api_key = os.getenv("LLM_API_KEY") |
| 101 | + model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929") |
| 102 | + |
| 103 | + llm = LLM( |
| 104 | + model=model, |
| 105 | + api_key=SecretStr(api_key), |
| 106 | + usage_id="iterative_refinement", |
| 107 | + ) |
| 108 | + |
| 109 | + workspace_dir, cobol_dir, java_dir = setup_workspace() |
| 110 | + critique_dir = workspace_dir / "critiques" |
| 111 | + cobol_files = create_sample_cobol_files(cobol_dir) |
| 112 | + |
| 113 | + critique_file = critique_dir / "critique_report.md" |
| 114 | + current_score = 0.0 |
| 115 | + iteration = 0 |
| 116 | + |
| 117 | + while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS: |
| 118 | + iteration += 1 |
| 119 | + |
| 120 | + # Phase 1: Refactoring |
| 121 | + refactoring_agent = get_default_agent(llm=llm, cli_mode=True) |
| 122 | + refactoring_conversation = Conversation( |
| 123 | + agent=refactoring_agent, |
| 124 | + workspace=str(workspace_dir), |
| 125 | + ) |
| 126 | + refactoring_conversation.send_message(get_refactoring_prompt(...)) |
| 127 | + refactoring_conversation.run() |
| 128 | + |
| 129 | + # Phase 2: Critique |
| 130 | + critique_agent = get_default_agent(llm=llm, cli_mode=True) |
| 131 | + critique_conversation = Conversation( |
| 132 | + agent=critique_agent, |
| 133 | + workspace=str(workspace_dir), |
| 134 | + ) |
| 135 | + critique_conversation.send_message(get_critique_prompt(...)) |
| 136 | + critique_conversation.run() |
| 137 | + |
| 138 | + current_score = parse_critique_score(critique_file) |
| 139 | + |
| 140 | + print(f"Final score: {current_score:.1f}%") |
| 141 | + print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost}") |
| 142 | + |
| 143 | + |
| 144 | +if __name__ == "__main__": |
| 145 | + run_iterative_refinement() |
| 146 | +``` |
| 147 | + |
| 148 | +```bash Running the Example |
| 149 | +export LLM_API_KEY="your-api-key" |
| 150 | +cd agent-sdk |
| 151 | +uv run python examples/01_standalone_sdk/31_iterative_refinement.py |
| 152 | +``` |
| 153 | + |
| 154 | +## How It Works |
| 155 | + |
| 156 | +### The Iteration Loop |
| 157 | + |
| 158 | +The core workflow runs in a loop until quality threshold is met: |
| 159 | + |
| 160 | +```python |
| 161 | +QUALITY_THRESHOLD = 90.0 |
| 162 | +MAX_ITERATIONS = 5 |
| 163 | + |
| 164 | +while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS: |
| 165 | + # Phase 1: Refactoring agent converts COBOL to Java |
| 166 | + refactoring_agent = get_default_agent(llm=llm, cli_mode=True) |
| 167 | + refactoring_conversation = Conversation(agent=refactoring_agent, workspace=str(workspace_dir)) |
| 168 | + refactoring_conversation.send_message(refactoring_prompt) |
| 169 | + refactoring_conversation.run() |
| 170 | + |
| 171 | + # Phase 2: Critique agent evaluates the conversion |
| 172 | + critique_agent = get_default_agent(llm=llm, cli_mode=True) |
| 173 | + critique_conversation = Conversation(agent=critique_agent, workspace=str(workspace_dir)) |
| 174 | + critique_conversation.send_message(critique_prompt) |
| 175 | + critique_conversation.run() |
| 176 | + |
| 177 | + # Parse score and decide whether to continue |
| 178 | + current_score = parse_critique_score(critique_file) |
| 179 | + |
| 180 | + iteration += 1 |
| 181 | +``` |
| 182 | + |
| 183 | +### Critique Scoring |
| 184 | + |
| 185 | +The critique agent evaluates each file on four dimensions (0-25 pts each): |
| 186 | +- **Correctness**: Does the Java code preserve the original business logic? |
| 187 | +- **Code Quality**: Is the code clean and following Java conventions? |
| 188 | +- **Completeness**: Are all COBOL features properly converted? |
| 189 | +- **Best Practices**: Does it use proper OOP, error handling, and documentation? |
| 190 | + |
| 191 | +### Feedback Loop |
| 192 | + |
| 193 | +When the score is below threshold, the refactoring agent receives the critique file location: |
| 194 | + |
| 195 | +```python |
| 196 | +if critique_file and critique_file.exists(): |
| 197 | + base_prompt += f""" |
| 198 | +IMPORTANT: A previous refactoring attempt was evaluated and needs improvement. |
| 199 | +Please review the critique at: {critique_file} |
| 200 | +Address all issues mentioned in the critique to improve the conversion quality. |
| 201 | +""" |
| 202 | +``` |
| 203 | + |
| 204 | +## Customization |
| 205 | + |
| 206 | +### Adjusting Thresholds |
| 207 | + |
| 208 | +```python |
| 209 | +QUALITY_THRESHOLD = 95.0 # Require higher quality |
| 210 | +MAX_ITERATIONS = 10 # Allow more iterations |
| 211 | +``` |
| 212 | + |
| 213 | +### Using Real COBOL Files |
| 214 | + |
| 215 | +The example uses sample files, but you can use real files from the [AWS CardDemo project](https://github.com/aws-samples/aws-mainframe-modernization-carddemo/tree/main/app/cbl). |
| 216 | + |
| 217 | +## Next Steps |
| 218 | + |
| 219 | +- [Agent Delegation](/sdk/guides/agent-delegation) - Parallel task execution with sub-agents |
| 220 | +- [Custom Tools](/sdk/guides/custom-tools) - Create specialized tools for your workflow |
0 commit comments