Skip to content

Commit 4954bed

Browse files
authored
Add documentation for iterative refinement example (#191)
* Add documentation for iterative refinement example This documents the COBOL to Java refactoring example with iterative refinement workflow. - Added sdk/guides/iterative-refinement.mdx with condensed code example - Added page to navigation in docs.json Related to OpenHands/software-agent-sdk#1414 Co-authored-by: openhands <openhands@all-hands.dev> * Fix review comments
1 parent 18b456d commit 4954bed

File tree

2 files changed

+221
-0
lines changed

2 files changed

+221
-0
lines changed

docs.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@
203203
"sdk/guides/convo-persistence",
204204
"sdk/guides/context-condenser",
205205
"sdk/guides/agent-delegation",
206+
"sdk/guides/iterative-refinement",
206207
"sdk/guides/security",
207208
"sdk/guides/metrics",
208209
"sdk/guides/observability",
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
---
2+
title: Iterative Refinement
3+
description: Implement iterative refinement workflows where agents refine their work based on critique feedback until quality thresholds are met.
4+
---
5+
6+
## Overview
7+
8+
Iterative refinement is a powerful pattern where multiple agents work together in a feedback loop:
9+
1. A **refactoring agent** performs the main task (e.g., code conversion)
10+
2. A **critique agent** evaluates the quality and provides detailed feedback
11+
3. If quality is below threshold, the refactoring agent tries again with the feedback
12+
13+
This pattern is useful for:
14+
- Code refactoring and modernization (e.g., COBOL to Java)
15+
- Document translation and localization
16+
- Content generation with quality requirements
17+
- Any task requiring iterative improvement
18+
19+
## Quick Start
20+
21+
<Note>
22+
This example is available on GitHub: [examples/01_standalone_sdk/31_iterative_refinement.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/31_iterative_refinement.py)
23+
</Note>
24+
25+
```python icon="python" expandable examples/01_standalone_sdk/31_iterative_refinement.py
26+
"""
27+
Iterative Refinement Example: COBOL to Java Refactoring
28+
29+
This example demonstrates an iterative refinement workflow where:
30+
1. A refactoring agent converts COBOL files to Java files
31+
2. A critique agent evaluates the quality of each conversion and provides scores
32+
3. If the average score is below 90%, the process repeats with feedback
33+
34+
The workflow continues until the refactoring meets the quality threshold.
35+
"""
36+
37+
import os
38+
import re
39+
import tempfile
40+
from pathlib import Path
41+
42+
from pydantic import SecretStr
43+
44+
from openhands.sdk import LLM, Conversation
45+
from openhands.tools.preset.default import get_default_agent
46+
47+
48+
QUALITY_THRESHOLD = 90.0
49+
MAX_ITERATIONS = 5
50+
51+
52+
def setup_workspace() -> tuple[Path, Path, Path]:
53+
"""Create workspace directories for the refactoring workflow."""
54+
workspace_dir = Path(tempfile.mkdtemp())
55+
cobol_dir = workspace_dir / "cobol"
56+
java_dir = workspace_dir / "java"
57+
critique_dir = workspace_dir / "critiques"
58+
59+
cobol_dir.mkdir(parents=True, exist_ok=True)
60+
java_dir.mkdir(parents=True, exist_ok=True)
61+
critique_dir.mkdir(parents=True, exist_ok=True)
62+
63+
return workspace_dir, cobol_dir, java_dir
64+
65+
66+
def create_sample_cobol_files(cobol_dir: Path) -> list[str]:
67+
"""Create sample COBOL files for demonstration."""
68+
# Sample COBOL files based on AWS CardDemo structure
69+
sample_files = {
70+
"CBACT01C.cbl": "...", # Account Display Program
71+
"CBCUS01C.cbl": "...", # Customer Information Program
72+
"CBTRN01C.cbl": "...", # Transaction Processing Program
73+
}
74+
created_files = []
75+
for filename, content in sample_files.items():
76+
file_path = cobol_dir / filename
77+
file_path.write_text(content)
78+
created_files.append(filename)
79+
return created_files
80+
81+
82+
def parse_critique_score(critique_file: Path) -> float:
83+
"""Parse the average score from the critique report."""
84+
if not critique_file.exists():
85+
return 0.0
86+
content = critique_file.read_text()
87+
patterns = [
88+
r"\*\*Average Score\*\*:\s*(\d+(?:\.\d+)?)",
89+
r"Average Score:\s*(\d+(?:\.\d+)?)",
90+
]
91+
for pattern in patterns:
92+
match = re.search(pattern, content, re.IGNORECASE)
93+
if match:
94+
return float(match.group(1))
95+
return 0.0
96+
97+
98+
def run_iterative_refinement() -> None:
99+
"""Run the iterative refinement workflow."""
100+
api_key = os.getenv("LLM_API_KEY")
101+
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
102+
103+
llm = LLM(
104+
model=model,
105+
api_key=SecretStr(api_key),
106+
usage_id="iterative_refinement",
107+
)
108+
109+
workspace_dir, cobol_dir, java_dir = setup_workspace()
110+
critique_dir = workspace_dir / "critiques"
111+
cobol_files = create_sample_cobol_files(cobol_dir)
112+
113+
critique_file = critique_dir / "critique_report.md"
114+
current_score = 0.0
115+
iteration = 0
116+
117+
while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS:
118+
iteration += 1
119+
120+
# Phase 1: Refactoring
121+
refactoring_agent = get_default_agent(llm=llm, cli_mode=True)
122+
refactoring_conversation = Conversation(
123+
agent=refactoring_agent,
124+
workspace=str(workspace_dir),
125+
)
126+
refactoring_conversation.send_message(get_refactoring_prompt(...))
127+
refactoring_conversation.run()
128+
129+
# Phase 2: Critique
130+
critique_agent = get_default_agent(llm=llm, cli_mode=True)
131+
critique_conversation = Conversation(
132+
agent=critique_agent,
133+
workspace=str(workspace_dir),
134+
)
135+
critique_conversation.send_message(get_critique_prompt(...))
136+
critique_conversation.run()
137+
138+
current_score = parse_critique_score(critique_file)
139+
140+
print(f"Final score: {current_score:.1f}%")
141+
print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost}")
142+
143+
144+
if __name__ == "__main__":
145+
run_iterative_refinement()
146+
```
147+
148+
```bash Running the Example
149+
export LLM_API_KEY="your-api-key"
150+
cd agent-sdk
151+
uv run python examples/01_standalone_sdk/31_iterative_refinement.py
152+
```
153+
154+
## How It Works
155+
156+
### The Iteration Loop
157+
158+
The core workflow runs in a loop until quality threshold is met:
159+
160+
```python
161+
QUALITY_THRESHOLD = 90.0
162+
MAX_ITERATIONS = 5
163+
164+
while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS:
165+
# Phase 1: Refactoring agent converts COBOL to Java
166+
refactoring_agent = get_default_agent(llm=llm, cli_mode=True)
167+
refactoring_conversation = Conversation(agent=refactoring_agent, workspace=str(workspace_dir))
168+
refactoring_conversation.send_message(refactoring_prompt)
169+
refactoring_conversation.run()
170+
171+
# Phase 2: Critique agent evaluates the conversion
172+
critique_agent = get_default_agent(llm=llm, cli_mode=True)
173+
critique_conversation = Conversation(agent=critique_agent, workspace=str(workspace_dir))
174+
critique_conversation.send_message(critique_prompt)
175+
critique_conversation.run()
176+
177+
# Parse score and decide whether to continue
178+
current_score = parse_critique_score(critique_file)
179+
180+
iteration += 1
181+
```
182+
183+
### Critique Scoring
184+
185+
The critique agent evaluates each file on four dimensions (0-25 pts each):
186+
- **Correctness**: Does the Java code preserve the original business logic?
187+
- **Code Quality**: Is the code clean and following Java conventions?
188+
- **Completeness**: Are all COBOL features properly converted?
189+
- **Best Practices**: Does it use proper OOP, error handling, and documentation?
190+
191+
### Feedback Loop
192+
193+
When the score is below threshold, the refactoring agent receives the critique file location:
194+
195+
```python
196+
if critique_file and critique_file.exists():
197+
base_prompt += f"""
198+
IMPORTANT: A previous refactoring attempt was evaluated and needs improvement.
199+
Please review the critique at: {critique_file}
200+
Address all issues mentioned in the critique to improve the conversion quality.
201+
"""
202+
```
203+
204+
## Customization
205+
206+
### Adjusting Thresholds
207+
208+
```python
209+
QUALITY_THRESHOLD = 95.0 # Require higher quality
210+
MAX_ITERATIONS = 10 # Allow more iterations
211+
```
212+
213+
### Using Real COBOL Files
214+
215+
The example uses sample files, but you can use real files from the [AWS CardDemo project](https://github.com/aws-samples/aws-mainframe-modernization-carddemo/tree/main/app/cbl).
216+
217+
## Next Steps
218+
219+
- [Agent Delegation](/sdk/guides/agent-delegation) - Parallel task execution with sub-agents
220+
- [Custom Tools](/sdk/guides/custom-tools) - Create specialized tools for your workflow

0 commit comments

Comments
 (0)