December 2025 Update: Now covers computer use agents, MCP tool integrations, and the latest agentic patterns from OpenAI and Anthropic.
The Agent Revolution
Agents are how AI goes from “chat” to “do”. Every company wants AI that can actually take actions—book meetings, write code, research competitors, manage workflows.Market Reality: Agent-building is the highest-paying AI skill in 2025. Companies pay $250-400K+ for engineers who can build reliable agents that complete real tasks. The demand far exceeds supply.
2025 Agent Landscape
| Agent Type | Description | Example |
|---|---|---|
| Tool-Use Agents | Call APIs, search, calculate | Customer support bots |
| Code Agents | Write, execute, debug code | GitHub Copilot Workspace |
| Computer Use | Control browser/desktop | Anthropic Claude computer use |
| Multi-Agent | Teams of specialized agents | Research + Writing teams |
| MCP Agents | Connect to any data source | Database assistants |
The Agent Mental Model
Copy
┌─────────────────────────────────────┐
│ User Goal │
└──────────────┬──────────────────────┘
│
┌──────────────▼──────────────────────┐
│ Agent Controller │
│ ┌────────┐ ┌────────┐ ┌────────┐│
│ │Perceive│→ │ Plan │→ │ Act ││
│ └────────┘ └────────┘ └────────┘│
│ ↑ │ │
│ └────────────────────┘ │
│ Feedback Loop │
└──────────────┬──────────────────────┘
│
┌──────────────────────────┼──────────────────────────┐
│ │ │
▼ ▼ ▼
┌─────────┐ ┌─────────────┐ ┌──────────┐
│ Tools │ │ Memory │ │ LLM │
│(Actions)│ │ (Context) │ │(Reasoning│
└─────────┘ └─────────────┘ └──────────┘
Production Agent Framework
Complete Implementation
Copy
from openai import OpenAI
from dataclasses import dataclass, field
from typing import List, Dict, Any, Callable, Optional
from enum import Enum
from abc import ABC, abstractmethod
import json
import time
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("agent")
class AgentStatus(Enum):
IDLE = "idle"
THINKING = "thinking"
ACTING = "acting"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class ToolCall:
name: str
arguments: Dict[str, Any]
result: Optional[str] = None
error: Optional[str] = None
duration_ms: float = 0
@dataclass
class AgentStep:
thought: str
tool_calls: List[ToolCall]
observation: str
timestamp: datetime = field(default_factory=datetime.now)
@dataclass
class AgentResult:
success: bool
answer: str
steps: List[AgentStep]
total_tokens: int
total_time_ms: float
tool_calls_count: int
class Tool(ABC):
"""Base class for agent tools"""
@property
@abstractmethod
def name(self) -> str:
pass
@property
@abstractmethod
def description(self) -> str:
pass
@property
@abstractmethod
def parameters(self) -> dict:
pass
@abstractmethod
def execute(self, **kwargs) -> str:
pass
def to_openai_tool(self) -> dict:
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": self.parameters
}
}
class WebSearchTool(Tool):
"""Web search tool using SerpAPI or similar"""
@property
def name(self) -> str:
return "web_search"
@property
def description(self) -> str:
return "Search the web for current information. Use for facts, news, or research."
@property
def parameters(self) -> dict:
return {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query"
},
"num_results": {
"type": "integer",
"description": "Number of results to return",
"default": 5
}
},
"required": ["query"]
}
def execute(self, query: str, num_results: int = 5) -> str:
# Mock implementation - replace with actual API
return json.dumps({
"query": query,
"results": [
{"title": f"Result {i+1} for {query}", "snippet": "..."}
for i in range(num_results)
]
})
class CalculatorTool(Tool):
"""Safe mathematical calculations"""
@property
def name(self) -> str:
return "calculator"
@property
def description(self) -> str:
return "Perform mathematical calculations. Supports +, -, *, /, **, sqrt, sin, cos, etc."
@property
def parameters(self) -> dict:
return {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Mathematical expression to evaluate"
}
},
"required": ["expression"]
}
def execute(self, expression: str) -> str:
import math
# Safe evaluation with limited functions
allowed = {
'abs': abs, 'round': round, 'min': min, 'max': max,
'sum': sum, 'pow': pow, 'sqrt': math.sqrt,
'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
'log': math.log, 'log10': math.log10, 'pi': math.pi, 'e': math.e
}
try:
# Remove dangerous characters
clean = ''.join(c for c in expression if c in '0123456789+-*/().^ ' or c.isalpha())
result = eval(clean, {"__builtins__": {}}, allowed)
return f"Result: {result}"
except Exception as e:
return f"Calculation error: {e}"
class CodeExecutionTool(Tool):
"""Execute Python code in sandboxed environment"""
@property
def name(self) -> str:
return "execute_code"
@property
def description(self) -> str:
return "Execute Python code and return output. Use for data processing, analysis, or automation."
@property
def parameters(self) -> dict:
return {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Python code to execute"
}
},
"required": ["code"]
}
def execute(self, code: str) -> str:
import subprocess
import tempfile
import os
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
f.write(code)
temp_file = f.name
try:
result = subprocess.run(
['python', temp_file],
capture_output=True,
text=True,
timeout=30
)
output = result.stdout or result.stderr
return output[:2000] if output else "Code executed successfully (no output)"
except subprocess.TimeoutExpired:
return "Error: Execution timed out (30s limit)"
except Exception as e:
return f"Error: {e}"
finally:
os.unlink(temp_file)
class FileWriteTool(Tool):
"""Write content to files"""
def __init__(self, allowed_extensions: List[str] = None):
self.allowed_extensions = allowed_extensions or ['.txt', '.md', '.json', '.csv', '.py']
@property
def name(self) -> str:
return "write_file"
@property
def description(self) -> str:
return f"Write content to a file. Allowed extensions: {', '.join(self.allowed_extensions)}"
@property
def parameters(self) -> dict:
return {
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path"},
"content": {"type": "string", "description": "Content to write"}
},
"required": ["path", "content"]
}
def execute(self, path: str, content: str) -> str:
import os
ext = os.path.splitext(path)[1].lower()
if ext not in self.allowed_extensions:
return f"Error: Extension {ext} not allowed"
try:
with open(path, 'w') as f:
f.write(content)
return f"Successfully wrote {len(content)} characters to {path}"
except Exception as e:
return f"Error writing file: {e}"
class ProductionAgent:
"""Production-grade ReAct agent"""
def __init__(
self,
tools: List[Tool],
model: str = "gpt-4o",
max_iterations: int = 10,
system_prompt: str = None
):
self.client = OpenAI()
self.tools = {tool.name: tool for tool in tools}
self.tool_schemas = [tool.to_openai_tool() for tool in tools]
self.model = model
self.max_iterations = max_iterations
self.system_prompt = system_prompt or self._default_system_prompt()
self.memory: List[Dict] = []
self.status = AgentStatus.IDLE
def _default_system_prompt(self) -> str:
return """You are an autonomous AI agent that can use tools to accomplish tasks.
IMPORTANT RULES:
1. Think step by step before acting
2. Use tools when you need information or to take actions
3. After each tool use, reflect on what you learned
4. If a tool fails, try an alternative approach
5. When you have enough information, provide a final answer
6. Be concise but thorough in your final response
Available tools and when to use them:
- web_search: For current information, facts, news
- calculator: For mathematical calculations
- execute_code: For data processing or complex logic
- write_file: To save outputs or create files"""
def add_to_memory(self, key: str, value: Any):
"""Add information to agent memory"""
self.memory.append({
"type": "memory",
"key": key,
"value": value,
"timestamp": datetime.now().isoformat()
})
def get_memory_context(self) -> str:
"""Get formatted memory context"""
if not self.memory:
return ""
items = [f"- {m['key']}: {m['value']}" for m in self.memory[-10:]] # Last 10
return "Agent Memory:\n" + "\n".join(items)
def run(self, task: str) -> AgentResult:
"""Execute agent on a task"""
start_time = time.time()
self.status = AgentStatus.THINKING
steps: List[AgentStep] = []
total_tokens = 0
tool_calls_count = 0
# Build initial messages
messages = [
{"role": "system", "content": self.system_prompt},
]
# Add memory context if any
memory_ctx = self.get_memory_context()
if memory_ctx:
messages.append({"role": "system", "content": memory_ctx})
messages.append({"role": "user", "content": task})
# Agent loop
for iteration in range(self.max_iterations):
logger.info(f"Iteration {iteration + 1}/{self.max_iterations}")
# Get LLM response
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
tools=self.tool_schemas if self.tools else None,
tool_choice="auto" if self.tools else None
)
total_tokens += response.usage.total_tokens
message = response.choices[0].message
messages.append(message)
# Check if we're done (no tool calls)
if not message.tool_calls:
self.status = AgentStatus.COMPLETED
return AgentResult(
success=True,
answer=message.content or "Task completed",
steps=steps,
total_tokens=total_tokens,
total_time_ms=(time.time() - start_time) * 1000,
tool_calls_count=tool_calls_count
)
# Execute tool calls
self.status = AgentStatus.ACTING
step_tool_calls = []
for tool_call in message.tool_calls:
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
logger.info(f"Tool call: {tool_name}({tool_args})")
call_start = time.time()
tool_result = ToolCall(name=tool_name, arguments=tool_args)
try:
if tool_name in self.tools:
result = self.tools[tool_name].execute(**tool_args)
tool_result.result = result
else:
tool_result.error = f"Unknown tool: {tool_name}"
result = tool_result.error
except Exception as e:
tool_result.error = str(e)
result = f"Tool error: {e}"
tool_result.duration_ms = (time.time() - call_start) * 1000
step_tool_calls.append(tool_result)
tool_calls_count += 1
# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result
})
# Record step
steps.append(AgentStep(
thought=message.content or "",
tool_calls=step_tool_calls,
observation="\n".join([tc.result or tc.error for tc in step_tool_calls])
))
self.status = AgentStatus.THINKING
# Max iterations reached
self.status = AgentStatus.FAILED
return AgentResult(
success=False,
answer="Max iterations reached without completing the task",
steps=steps,
total_tokens=total_tokens,
total_time_ms=(time.time() - start_time) * 1000,
tool_calls_count=tool_calls_count
)
# Usage Example
tools = [
WebSearchTool(),
CalculatorTool(),
CodeExecutionTool(),
FileWriteTool()
]
agent = ProductionAgent(tools=tools, max_iterations=10)
# Add context to memory
agent.add_to_memory("user_name", "Alex")
agent.add_to_memory("preferred_format", "detailed explanations with examples")
result = agent.run("What is 15% tip on a $127.50 restaurant bill, and save the calculation to tip.txt")
print(f"Success: {result.success}")
print(f"Answer: {result.answer}")
print(f"Steps: {len(result.steps)}")
print(f"Tool calls: {result.tool_calls_count}")
print(f"Total time: {result.total_time_ms:.1f}ms")
Advanced Agent Patterns
1. Planning Agent
Copy
class PlanningAgent:
"""Agent that creates and executes plans"""
def __init__(self, executor: ProductionAgent):
self.client = OpenAI()
self.executor = executor
async def run(self, task: str) -> AgentResult:
# Step 1: Create plan
plan = await self._create_plan(task)
logger.info(f"Plan created with {len(plan)} steps")
# Step 2: Execute plan steps
results = []
for i, step in enumerate(plan):
logger.info(f"Executing step {i+1}: {step}")
# Execute step with context from previous results
context = f"Previous results: {json.dumps(results[-3:])}" if results else ""
step_result = self.executor.run(f"{step}\n\n{context}")
results.append({
"step": step,
"success": step_result.success,
"answer": step_result.answer
})
# Abort if critical step fails
if not step_result.success and self._is_critical_step(step):
break
# Step 3: Synthesize final answer
return await self._synthesize(task, results)
async def _create_plan(self, task: str) -> List[str]:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """Create a step-by-step plan to accomplish this task.
Each step should be a single, actionable task.
Return as JSON: {"steps": ["step 1", "step 2", ...]}"""
},
{"role": "user", "content": task}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
return result.get("steps", [task])
def _is_critical_step(self, step: str) -> bool:
"""Determine if step failure should abort execution"""
critical_keywords = ["required", "must", "critical", "essential"]
return any(kw in step.lower() for kw in critical_keywords)
async def _synthesize(self, task: str, results: List[Dict]) -> AgentResult:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "Synthesize these step results into a final answer."
},
{
"role": "user",
"content": f"Task: {task}\n\nStep results:\n{json.dumps(results, indent=2)}"
}
]
)
return AgentResult(
success=all(r["success"] for r in results),
answer=response.choices[0].message.content,
steps=[],
total_tokens=0,
total_time_ms=0,
tool_calls_count=len(results)
)
2. Self-Correcting Agent (Reflexion)
Copy
class ReflexionAgent:
"""Agent that learns from failures and self-corrects"""
def __init__(self, base_agent: ProductionAgent, max_retries: int = 3):
self.client = OpenAI()
self.base_agent = base_agent
self.max_retries = max_retries
self.reflections: List[str] = []
def run(self, task: str) -> AgentResult:
for attempt in range(self.max_retries):
# Add reflections from previous attempts
enhanced_task = task
if self.reflections:
enhanced_task = f"""{task}
IMPORTANT - Learn from previous attempts:
{chr(10).join(f'- {r}' for r in self.reflections)}"""
# Try to complete task
result = self.base_agent.run(enhanced_task)
# Evaluate success
is_successful, reflection = self._evaluate_result(task, result)
if is_successful:
return result
# Learn from failure
self.reflections.append(reflection)
logger.info(f"Attempt {attempt + 1} failed. Reflection: {reflection}")
return result
def _evaluate_result(self, task: str, result: AgentResult) -> tuple[bool, str]:
"""Evaluate if result is satisfactory"""
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """Evaluate if this result successfully completes the task.
Return JSON:
{
"success": true/false,
"reflection": "What went wrong and how to improve (if failed)"
}"""
},
{
"role": "user",
"content": f"Task: {task}\n\nResult: {result.answer}"
}
],
response_format={"type": "json_object"}
)
eval_result = json.loads(response.choices[0].message.content)
return eval_result["success"], eval_result.get("reflection", "")
3. Multi-Agent Collaboration
Copy
class MultiAgentOrchestrator:
"""Coordinate multiple specialized agents"""
def __init__(self):
self.client = OpenAI()
# Create specialized agents
self.researcher = ProductionAgent(
tools=[WebSearchTool()],
system_prompt="You are a research agent. Find accurate, current information."
)
self.analyst = ProductionAgent(
tools=[CalculatorTool(), CodeExecutionTool()],
system_prompt="You are an analysis agent. Process data and provide insights."
)
self.writer = ProductionAgent(
tools=[FileWriteTool()],
system_prompt="You are a writing agent. Create clear, well-structured content."
)
async def run(self, task: str) -> str:
# Step 1: Route to appropriate agent(s)
plan = await self._route_task(task)
results = {}
# Step 2: Execute with each agent
for step in plan:
agent_name = step["agent"]
agent_task = step["task"]
dependencies = step.get("dependencies", [])
# Build context from dependencies
context = "\n".join([
f"{dep}: {results[dep]}"
for dep in dependencies if dep in results
])
full_task = f"{agent_task}\n\nContext:\n{context}" if context else agent_task
# Get appropriate agent
agent = getattr(self, agent_name, None)
if agent:
result = agent.run(full_task)
results[step["id"]] = result.answer
# Step 3: Synthesize final output
return await self._synthesize(task, results)
async def _route_task(self, task: str) -> List[Dict]:
"""Determine which agents should handle which parts"""
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """Route this task to specialized agents.
Available agents: researcher, analyst, writer
Return JSON:
{
"steps": [
{"id": "step1", "agent": "researcher", "task": "...", "dependencies": []},
{"id": "step2", "agent": "analyst", "task": "...", "dependencies": ["step1"]}
]
}"""
},
{"role": "user", "content": task}
],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
return result.get("steps", [])
async def _synthesize(self, task: str, results: Dict) -> str:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Combine these agent outputs into a final response."},
{"role": "user", "content": f"Task: {task}\n\nResults: {json.dumps(results)}"}
]
)
return response.choices[0].message.content
Safety and Guardrails
Copy
class SafeAgent(ProductionAgent):
"""Agent with safety constraints"""
BLOCKED_ACTIONS = [
"delete", "remove", "drop", "truncate",
"rm -rf", "format", "sudo"
]
REQUIRE_APPROVAL = [
"send_email", "make_payment", "delete_file",
"post_to_social", "execute_code"
]
def __init__(self, *args, require_human_approval: bool = True, **kwargs):
super().__init__(*args, **kwargs)
self.require_human_approval = require_human_approval
self.approved_actions: List[str] = []
def _validate_tool_call(self, tool_name: str, arguments: Dict) -> tuple[bool, str]:
"""Validate if tool call should be allowed"""
# Check for blocked actions
args_str = json.dumps(arguments).lower()
for blocked in self.BLOCKED_ACTIONS:
if blocked in args_str:
return False, f"Blocked action detected: {blocked}"
# Check if approval required
if tool_name in self.REQUIRE_APPROVAL:
action_key = f"{tool_name}:{json.dumps(arguments, sort_keys=True)}"
if action_key not in self.approved_actions:
if self.require_human_approval:
approved = self._request_approval(tool_name, arguments)
if approved:
self.approved_actions.append(action_key)
else:
return False, "Action not approved by user"
return True, ""
def _request_approval(self, tool_name: str, arguments: Dict) -> bool:
"""Request human approval for sensitive action"""
print(f"\n[APPROVAL REQUIRED]")
print(f"Tool: {tool_name}")
print(f"Arguments: {json.dumps(arguments, indent=2)}")
response = input("Approve? (yes/no): ")
return response.lower() in ["yes", "y"]
Computer Use Agents (2025)
Anthropic’s Computer Use capability allows agents to control a browser or desktop. This is the frontier of agentic AI.Copy
import anthropic
import base64
client = anthropic.Anthropic()
def computer_use_agent(task: str):
"""Agent that can control a computer to complete tasks"""
messages = [
{
"role": "user",
"content": task
}
]
# Computer use requires specific tools
tools = [
{
"type": "computer_20241022",
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1,
},
{
"type": "text_editor_20241022",
"name": "str_replace_editor"
},
{
"type": "bash_20241022",
"name": "bash"
}
]
while True:
response = client.beta.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=4096,
tools=tools,
messages=messages,
betas=["computer-use-2024-10-22"]
)
# Check if task is complete
if response.stop_reason == "end_turn":
final_text = next(
(block.text for block in response.content if hasattr(block, "text")),
"Task completed"
)
return final_text
# Process tool uses
for block in response.content:
if block.type == "tool_use":
# Execute the computer action
result = execute_computer_action(block)
messages.append({"role": "assistant", "content": response.content})
messages.append({
"role": "user",
"content": [{
"type": "tool_result",
"tool_use_id": block.id,
"content": result
}]
})
def execute_computer_action(tool_block):
"""Execute a computer use action and return screenshot"""
action = tool_block.input.get("action")
# In production, use a headless browser or VM
# This is a simplified example
if action == "screenshot":
# Take screenshot and return base64
return {"type": "image", "source": {"type": "base64", "data": "..."}}
elif action == "click":
x, y = tool_block.input.get("coordinate", [0, 0])
# Click at coordinates
return f"Clicked at ({x}, {y})"
elif action == "type":
text = tool_block.input.get("text", "")
# Type text
return f"Typed: {text}"
return "Action completed"
# Example usage
# result = computer_use_agent("Go to github.com and star the langchain repository")
Safety Critical: Computer use agents can take real actions on real systems. Always run in sandboxed environments (VMs, containers) and implement strict guardrails.
Key Takeaways
ReAct Is Your Foundation
The Reasoning + Acting loop is the core pattern. Master it before adding complexity.
Tools Are Everything
Agents are only as useful as their tools. Invest in robust, well-tested tools.
Safety First
Always implement guardrails. Validate inputs, limit actions, require approval for sensitive operations.
Observe and Debug
Log every decision, tool call, and result. You can’t fix what you can’t see.
What’s Next
LangGraph
Build complex agent workflows with state machines and conditional routing