Anthropic's flagship model — leads coding benchmarks, handles 200K context natively, and supports computer use for browser/desktop automation. Best-in-class for code generation, analysis, and instruction following.
Claude 3.5 Sonnet consistently ranks at or near the top of coding benchmarks (SWE-bench, HumanEval) and is Anthropic's recommended model for: writing and debugging code, analysing long documents (contracts, codebases, research papers), structured data extraction, and agentic tasks where the model must plan and use tools over multiple turns.
Its 200K context window (roughly 150,000 words or 500 pages) is genuinely usable — unlike some models with large nominal contexts but degraded performance beyond 32K tokens, Claude 3.5 Sonnet maintains strong recall and reasoning across the full window. This enables workflows that were previously impossible: loading an entire codebase into context, processing full legal documents, or maintaining long conversation histories.
Claude 3.5 Sonnet is also the primary model for Anthropic's computer use capability — the ability to control a browser or desktop by interpreting screenshots and generating mouse/keyboard actions.
pip install anthropic
import anthropic
client = anthropic.Anthropic() # reads ANTHROPIC_API_KEY from env
# Basic completion
message = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
system="You are an expert Python developer. Be concise.",
messages=[
{"role": "user", "content": "Review this function for bugs:
def divide(a, b):
return a / b"}
]
)
print(message.content[0].text)
print(f"Input tokens: {message.usage.input_tokens}")
print(f"Output tokens: {message.usage.output_tokens}")
# Streaming
with client.messages.stream(
model="claude-sonnet-4-5",
max_tokens=2048,
messages=[{"role": "user", "content": "Write a Python async web scraper."}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
# Multi-turn conversation
messages = []
for user_input in ["What is RLHF?", "How does it differ from RLAIF?", "Give a concrete example."]:
messages.append({"role": "user", "content": user_input})
response = client.messages.create(
model="claude-haiku-4-5-20251001", # haiku for conversational turns
max_tokens=512,
messages=messages
)
reply = response.content[0].text
messages.append({"role": "assistant", "content": reply})
print(f"Claude: {reply[:100]}...")
from pathlib import Path
# Load an entire codebase into context
def load_python_files(directory: str, max_files: int = 50) -> str:
files = list(Path(directory).rglob("*.py"))[:max_files]
parts = []
for f in files:
try:
content = f.read_text()
parts.append(f"## {f}
```python
{content}
```")
except Exception:
pass
return "
".join(parts)
codebase = load_python_files("./src")
print(f"Codebase size: {len(codebase):,} chars ({len(codebase)//4:,} tokens est.)")
# Ask architectural questions over the full codebase
message = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
system="You are a senior software architect. Answer questions about the codebase.",
messages=[{
"role": "user",
"content": f"{codebase}
---
What are the main architectural patterns used here? Where are the biggest risks?"
}]
)
print(message.content[0].text)
# Tips for 200K context:
# 1. Put the most important content near the END of the prompt (recency bias)
# 2. Use explicit section headers so Claude can navigate the context
# 3. Break analysis into targeted questions rather than "analyse everything"
tools = [
{
"name": "read_file",
"description": "Read a file from the filesystem",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path to read"}
},
"required": ["path"]
}
},
{
"name": "run_python",
"description": "Execute Python code and return stdout",
"input_schema": {
"type": "object",
"properties": {
"code": {"type": "string", "description": "Python code to execute"}
},
"required": ["code"]
}
}
]
def agent_loop(task: str, max_turns: int = 10) -> str:
messages = [{"role": "user", "content": task}]
for _ in range(max_turns):
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
tools=tools,
messages=messages
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason == "end_turn":
# Extract final text response
return next(b.text for b in response.content if hasattr(b, "text"))
# Process tool calls
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = dispatch_tool(block.name, block.input) # your dispatcher
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(result)
})
messages.append({"role": "user", "content": tool_results})
return "Max turns reached"
import json
# Use tool use for reliable structured extraction
extraction_tool = {
"name": "extract_data",
"description": "Extract structured data from the text",
"input_schema": {
"type": "object",
"properties": {
"company_name": {"type": "string"},
"revenue": {"type": "number", "description": "Annual revenue in USD"},
"employee_count": {"type": "integer"},
"founded_year": {"type": "integer"},
"headquarters": {"type": "string"},
"key_products": {"type": "array", "items": {"type": "string"}},
},
"required": ["company_name"]
}
}
def extract_company_data(text: str) -> dict:
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=1024,
tools=[extraction_tool],
tool_choice={"type": "tool", "name": "extract_data"}, # force this tool
messages=[{"role": "user", "content": f"Extract company data from:
{text}"}]
)
for block in response.content:
if block.type == "tool_use" and block.name == "extract_data":
return block.input
return {}
# Tool use forces structured output — more reliable than asking for JSON in prompt
result = extract_company_data("Anthropic, founded in 2021 by Dario Amodei and team...")
print(result)
# Computer use lets Claude control a browser/desktop by viewing screenshots
# Requires a sandboxed environment — never run on your main machine
import anthropic
import base64
client = anthropic.Anthropic()
computer_tool = {
"type": "computer_20241022",
"name": "computer",
"display_width_px": 1280,
"display_height_px": 800,
"display_number": 1,
}
def take_screenshot() -> str:
# Returns base64-encoded screenshot (use pyautogui, playwright, etc.)
import pyautogui
from io import BytesIO
img = pyautogui.screenshot()
buf = BytesIO()
img.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode()
def run_computer_task(task: str, max_steps: int = 20):
screenshot = take_screenshot()
messages = [{
"role": "user",
"content": [
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": screenshot}},
{"type": "text", "text": task}
]
}]
for step in range(max_steps):
response = client.beta.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
tools=[computer_tool],
messages=messages,
betas=["computer-use-2024-10-22"],
)
# Process computer actions (click, type, scroll, etc.)
# Take new screenshot after each action
# Continue until stop_reason == "end_turn"
if response.stop_reason == "end_turn":
break
# Computer use is best for: automating web forms, data entry,
# testing UI flows, and browser-based research tasks.
Token counting differs between Anthropic and OpenAI. Claude uses a different tokeniser than GPT-4. A text that's 1000 tokens for GPT-4 might be 850 or 1100 tokens for Claude. Always use client.messages.count_tokens() for accurate estimates before sending large requests. The Anthropic API also charges for system prompt tokens on every request — cache frequent system prompts using "cache_control": {"type": "prompt_caching"} to reduce costs by up to 90% on repeated prompts.
The 200K context limit is on the full request, not just the user message. System prompt + all conversation history + current user message + tool results all count toward the 200K limit. In long agentic runs, track cumulative token usage and summarise early turns before you hit the limit.
Claude refuses differently than GPT-4. Claude is more likely to decline requests that feel like they're testing limits, even benign ones. Adding context about why the task is legitimate (e.g., "I'm a security researcher testing my own system") reduces refusals for borderline requests.
Claude 3.5 Sonnet sits at the top of the cost-performance curve for most production tasks. Understanding when to choose it versus Claude 3 Haiku or Claude 3 Opus helps you optimise your inference budget without sacrificing quality.
| Use Case | Recommended Model | Reason |
|---|---|---|
| High-volume classification, routing | Claude 3 Haiku | 10× cheaper; accuracy gap negligible for simple tasks |
| RAG synthesis, summarisation | Claude 3.5 Sonnet | Best quality/cost ratio; fast output tokens |
| Multi-step reasoning, coding | Claude 3.5 Sonnet | Outperforms Opus on most coding benchmarks |
| Very long-document analysis (>100K tokens) | Claude 3.5 Sonnet | 200K context with strong recall |
| Computer use / browser automation | Claude 3.5 Sonnet | Only supported model for computer use beta |
import anthropic
client = anthropic.Anthropic()
# Migrate from claude-3-opus-20240229 to claude-3-5-sonnet-20241022
# Key differences to handle:
# 1. claude-3-5-sonnet is faster (higher output tokens/sec)
# 2. Tool use format is identical — no changes needed
# 3. System prompt position is unchanged
def create_with_fallback(prompt: str, primary="claude-3-5-sonnet-20241022",
fallback="claude-3-opus-20240229") -> str:
for model in [primary, fallback]:
try:
resp = client.messages.create(
model=model,
max_tokens=2048,
messages=[{"role": "user", "content": prompt}]
)
return resp.content[0].text
except anthropic.APIStatusError as e:
if model == fallback:
raise
print(f"Primary model failed ({e.status_code}), trying fallback...")
return ""