LLM planning framework that breaks complex tasks into structured, executable steps with dependency tracking — giving agents a formal plan representation they can follow, revise, and communicate to users.
Basic agent planning is implicit: the model reasons about what to do next at each step. Plano makes the plan explicit: before execution begins, the agent produces a structured plan document — a tree of named steps, each with a description, dependencies, status, and expected output. This plan is a first-class object that can be inspected, modified, displayed to users, and used to coordinate multi-agent execution.
The key insight: an explicit plan separates planning quality from execution quality. You can validate the plan before a single tool is called (is the approach sensible? are the steps achievable?), and you can revise the plan mid-execution when a step fails or reveals new information. Without an explicit plan, replanning requires re-running the entire reasoning chain from scratch.
Plano-style planning is most valuable for: long-horizon tasks (10+ steps), tasks where partial progress should be visible to users, and multi-agent systems where different agents execute different parts of the plan.
from dataclasses import dataclass, field
from typing import Literal
from enum import Enum
class StepStatus(Enum):
PENDING = "pending"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
FAILED = "failed"
SKIPPED = "skipped"
@dataclass
class PlanStep:
id: str # "step_1", "step_2a", etc.
title: str # Short human-readable name
description: str # What this step does
depends_on: list[str] = field(default_factory=list) # IDs of prerequisite steps
status: StepStatus = StepStatus.PENDING
result: str = "" # Output from execution
agent: str = "default" # Which agent executes this step
estimated_tokens: int = 500 # For cost estimation
@dataclass
class Plan:
goal: str
steps: list[PlanStep]
created_at: str = ""
revision: int = 0
def get_ready_steps(self) -> list[PlanStep]:
'''Steps whose dependencies are all completed.'''
completed_ids = {s.id for s in self.steps if s.status == StepStatus.COMPLETED}
return [
s for s in self.steps
if s.status == StepStatus.PENDING
and all(dep in completed_ids for dep in s.depends_on)
]
def is_complete(self) -> bool:
return all(s.status in (StepStatus.COMPLETED, StepStatus.SKIPPED)
for s in self.steps)
def summary(self) -> str:
total = len(self.steps)
done = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED)
failed = sum(1 for s in self.steps if s.status == StepStatus.FAILED)
return f"{done}/{total} complete, {failed} failed"
import anthropic, json
client = anthropic.Anthropic()
PLAN_PROMPT = '''You are a planning agent. Break the goal into concrete, actionable steps.
Return a JSON plan:
{
"goal": "...",
"steps": [
{
"id": "step_1",
"title": "Short title",
"description": "What this step does and produces",
"depends_on": [],
"agent": "researcher|writer|coder|default",
"estimated_tokens": 500
}
]
}
Rules:
- Each step should have one clear deliverable
- List only direct dependencies (not transitive)
- Steps with no dependencies can run in parallel
- Use 3-8 steps for most tasks'''
def generate_plan(goal: str) -> Plan:
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
messages=[{"role": "user", "content": f"{PLAN_PROMPT}
Goal: {goal}"}]
)
text = response.content[0].text
# Extract JSON
import re
match = re.search(r'\{.*\}', text, re.DOTALL)
if not match:
raise ValueError("No JSON plan found in response")
data = json.loads(match.group())
steps = [PlanStep(**s) for s in data["steps"]]
return Plan(goal=data["goal"], steps=steps)
plan = generate_plan(
"Research the top 5 vector databases, compare their features, and write a decision guide."
)
for step in plan.steps:
deps = f" (after: {step.depends_on})" if step.depends_on else " (can start now)"
print(f" {step.id}: {step.title}{deps}")
import asyncio
async def execute_plan(plan: Plan) -> Plan:
'''Execute a plan respecting dependencies, parallelising where possible.'''
async def execute_step(step: PlanStep) -> str:
step.status = StepStatus.IN_PROGRESS
print(f"Starting: {step.title}")
try:
# Execute the step (replace with real agent call)
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=512,
messages=[{"role": "user", "content": step.description}]
)
result = response.content[0].text
step.result = result
step.status = StepStatus.COMPLETED
print(f"Completed: {step.title}")
return result
except Exception as e:
step.status = StepStatus.FAILED
step.result = str(e)
print(f"Failed: {step.title} — {e}")
return ""
# Topological execution: process waves of ready steps in parallel
max_iterations = len(plan.steps) + 1
for _ in range(max_iterations):
ready = plan.get_ready_steps()
if not ready:
break
# Execute all ready steps in parallel
await asyncio.gather(*[execute_step(s) for s in ready])
if plan.is_complete():
break
return plan
# Run the plan
plan = generate_plan("Write a Python web scraper with error handling and rate limiting.")
asyncio.run(execute_plan(plan))
print(f"
Plan complete: {plan.summary()}")
def revise_plan(plan: Plan, failed_step: PlanStep, error: str) -> Plan:
'''Ask the LLM to revise the plan given a failed step.'''
completed_results = "
".join(
f"- {s.title}: {s.result[:200]}"
for s in plan.steps if s.status == StepStatus.COMPLETED
)
prompt = f'''A step failed in this plan. Revise the remaining steps.
Original goal: {plan.goal}
Failed step: {failed_step.title}
Error: {error}
Completed so far:
{completed_results}
Provide a revised plan (JSON format) for the REMAINING steps only.
Preserve the original step IDs where possible, add new ones as needed.'''
response = client.messages.create(
model="claude-sonnet-4-5", max_tokens=2048,
messages=[{"role": "user", "content": prompt}]
)
import re, json
match = re.search(r'\{.*\}', response.content[0].text, re.DOTALL)
revised_data = json.loads(match.group())
new_steps = [PlanStep(**s) for s in revised_data.get("steps", [])]
# Keep completed steps, replace pending/failed with revised steps
final_steps = [s for s in plan.steps if s.status == StepStatus.COMPLETED]
final_steps.extend(new_steps)
plan.steps = final_steps
plan.revision += 1
return plan
One of Plano's biggest value-adds is making agent progress visible. Instead of a spinner, users see a live plan with step statuses:
def render_plan_markdown(plan: Plan) -> str:
status_emoji = {
StepStatus.PENDING: "⬜",
StepStatus.IN_PROGRESS: "🔄",
StepStatus.COMPLETED: "✅",
StepStatus.FAILED: "❌",
StepStatus.SKIPPED: "⏭️",
}
lines = [f"## Plan: {plan.goal}", f"*{plan.summary()}*", ""]
for step in plan.steps:
emoji = status_emoji[step.status]
lines.append(f"{emoji} **{step.title}**")
lines.append(f" {step.description}")
if step.result and step.status == StepStatus.COMPLETED:
lines.append(f" *Result: {step.result[:100]}...*")
lines.append("")
return "
".join(lines)
# Stream plan updates as steps complete
for step in plan.steps:
print(render_plan_markdown(plan))
# In production: send via WebSocket or SSE to frontend
Users tolerate longer waits when they can see progress. A visible plan that ticks off completed steps feels fast even when the total time is the same. This is one of the highest-ROI UX improvements for agent-heavy applications.
Plans are predictions, not contracts. The LLM generates a plan based on its expectations of what each step will produce. When a step returns unexpected output (a search returns no results, an API is down), the plan may become invalid. Build in explicit revision triggers: after each step, evaluate whether the remaining plan is still appropriate given the actual results.
Dependency graphs can have cycles. If Step A depends on Step B and Step B depends on Step A, execution deadlocks. Validate the plan for cycles before execution using topological sort. If a cycle exists, ask the LLM to revise.
Estimated tokens are usually wrong. The LLM's token estimates are rough. For accurate cost forecasting, track actual tokens used per step and use historical data to calibrate future estimates. Never use LLM-estimated tokens for billing.
LLM-based planning systems require careful design to be reliable in production. The three patterns below cover the most common architectures, from simple single-shot plans to robust iterative systems.
| Pattern | Description | Best For | Failure Risk |
|---|---|---|---|
| Single-shot plan | Generate full plan upfront, execute sequentially | Predictable, short tasks | Early errors cascade; no recovery |
| Hierarchical plan | Decompose into sub-plans recursively | Complex multi-stage tasks | Over-planning; combinatorial depth |
| Replanning loop | Replan after each step based on results | Dynamic environments | High cost; risk of infinite loops |
import anthropic, json
client = anthropic.Anthropic()
PLAN_PROMPT = """Given the goal below, produce a JSON plan with steps.
Each step: {{"id": int, "action": str, "depends_on": [int], "status": "pending"}}
Goal: {goal}
Return only valid JSON."""
def generate_plan(goal: str) -> list[dict]:
resp = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": PLAN_PROMPT.format(goal=goal)}]
)
return json.loads(resp.content[0].text)
def execute_step(step: dict, context: dict) -> str:
# Placeholder: in production, dispatch to tools/agents
return f"Completed: {step['action']}"
def run_plan(goal: str):
plan = generate_plan(goal)
results = {}
for step in plan:
if all(results.get(dep) for dep in step["depends_on"]):
results[step["id"]] = execute_step(step, results)
step["status"] = "done"
return results
For production replanning loops, always enforce a hard step limit (e.g. 20 steps) and a budget cap (e.g. $0.50 per plan execution). Log the full plan and all replan events so you can audit why the system diverged from the original plan. Human-in-the-loop checkpoints at natural plan boundaries (e.g. before irreversible actions like sending emails or writing files) reduce the blast radius of planning failures.