December 2025 Update: Covers the latest tool calling patterns from OpenAI, Anthropic, and open-source models including parallel tool calls, structured outputs, and MCP integrations.
Why Tool Calling Matters
LLMs can reason but can’t act. Tool calling bridges this gap—enabling LLMs to:- Query databases and APIs
- Execute code
- Search the web
- Control external systems
- Access real-time information
Copy
Without Tools With Tools
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
User: "What's the weather?" User: "What's the weather?"
│
LLM: "I don't have access LLM: [Calls weather_api()]
to real-time weather" │
API: {"temp": 72, "sky": "sunny"}
│
LLM: "It's 72°F and sunny!"
OpenAI Tool Calling
Basic Function Definition
Copy
from openai import OpenAI
import json
client = OpenAI()
# Define tools
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g., 'San Francisco, CA'"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
}
},
{
"type": "function",
"function": {
"name": "search_database",
"description": "Search the product database",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"category": {
"type": "string",
"enum": ["electronics", "clothing", "books", "all"],
"description": "Product category filter"
},
"max_results": {
"type": "integer",
"description": "Maximum results to return",
"default": 10
}
},
"required": ["query"]
}
}
}
]
Tool Calling Loop
Copy
def process_with_tools(user_message: str) -> str:
"""Complete tool calling loop"""
messages = [{"role": "user", "content": user_message}]
while True:
# Get model response
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
tool_choice="auto" # Let model decide
)
message = response.choices[0].message
messages.append(message)
# Check if we're done (no tool calls)
if not message.tool_calls:
return message.content
# Process each tool call
for tool_call in message.tool_calls:
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
# Execute the function
result = execute_function(function_name, arguments)
# Add result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result)
})
def execute_function(name: str, args: dict) -> dict:
"""Execute a function by name"""
functions = {
"get_weather": get_weather,
"search_database": search_database
}
if name in functions:
return functions[name](**args)
else:
return {"error": f"Unknown function: {name}"}
def get_weather(location: str, unit: str = "fahrenheit") -> dict:
# Mock implementation - replace with actual API
return {
"location": location,
"temperature": 72 if unit == "fahrenheit" else 22,
"unit": unit,
"conditions": "sunny"
}
def search_database(query: str, category: str = "all", max_results: int = 10) -> dict:
# Mock implementation
return {
"query": query,
"results": [
{"id": 1, "name": f"Product matching '{query}'", "price": 29.99}
],
"total": 1
}
Parallel Tool Calls
Modern LLMs can call multiple tools simultaneously:Copy
def process_parallel_tools(user_message: str) -> str:
"""Handle parallel tool execution"""
messages = [{"role": "user", "content": user_message}]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
parallel_tool_calls=True # Enable parallel calls
)
message = response.choices[0].message
if message.tool_calls:
messages.append(message)
# Execute all tools in parallel
import asyncio
async def execute_all():
tasks = []
for tool_call in message.tool_calls:
task = asyncio.create_task(
async_execute_function(
tool_call.function.name,
json.loads(tool_call.function.arguments)
)
)
tasks.append((tool_call.id, task))
results = []
for tool_id, task in tasks:
result = await task
results.append({
"role": "tool",
"tool_call_id": tool_id,
"content": json.dumps(result)
})
return results
tool_results = asyncio.run(execute_all())
messages.extend(tool_results)
# Get final response
final_response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return final_response.choices[0].message.content
return message.content
Structured Outputs
Force the model to output valid JSON that matches a schema:Copy
from pydantic import BaseModel
from typing import List, Optional
class ProductRecommendation(BaseModel):
product_id: str
name: str
price: float
reason: str
confidence: float
class RecommendationResponse(BaseModel):
recommendations: List[ProductRecommendation]
search_query_used: str
total_matches: int
def get_structured_recommendations(query: str) -> RecommendationResponse:
"""Get recommendations with guaranteed schema"""
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You are a product recommendation assistant."
},
{
"role": "user",
"content": f"Recommend products for: {query}"
}
],
response_format=RecommendationResponse
)
return response.choices[0].message.parsed
Anthropic Tool Use
Claude has excellent tool use capabilities:Copy
import anthropic
client = anthropic.Anthropic()
tools = [
{
"name": "get_stock_price",
"description": "Get the current stock price for a ticker symbol",
"input_schema": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "Stock ticker symbol (e.g., AAPL)"
}
},
"required": ["ticker"]
}
},
{
"name": "calculate_portfolio_value",
"description": "Calculate total portfolio value",
"input_schema": {
"type": "object",
"properties": {
"holdings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"ticker": {"type": "string"},
"shares": {"type": "number"}
}
},
"description": "List of stock holdings"
}
},
"required": ["holdings"]
}
}
]
def chat_with_tools(user_message: str) -> str:
"""Claude tool calling loop"""
messages = [{"role": "user", "content": user_message}]
while True:
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=4096,
tools=tools,
messages=messages
)
# Check stop reason
if response.stop_reason == "end_turn":
# Extract text response
for block in response.content:
if hasattr(block, "text"):
return block.text
return ""
# Process tool uses
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(result)
})
if tool_results:
messages.append({"role": "user", "content": tool_results})
def execute_tool(name: str, inputs: dict) -> dict:
"""Execute a tool by name"""
if name == "get_stock_price":
return {"ticker": inputs["ticker"], "price": 150.00, "currency": "USD"}
elif name == "calculate_portfolio_value":
total = sum(h.get("shares", 0) * 150 for h in inputs.get("holdings", []))
return {"total_value": total, "currency": "USD"}
return {"error": "Unknown tool"}
Building Robust Tool Systems
Tool Registry Pattern
Copy
from typing import Callable, Any
from dataclasses import dataclass
from functools import wraps
import inspect
@dataclass
class Tool:
name: str
description: str
function: Callable
parameters: dict
class ToolRegistry:
"""Central registry for all tools"""
def __init__(self):
self.tools: dict[str, Tool] = {}
def register(self, description: str):
"""Decorator to register a function as a tool"""
def decorator(func: Callable):
# Extract parameters from function signature
sig = inspect.signature(func)
params = self._extract_parameters(sig)
tool = Tool(
name=func.__name__,
description=description,
function=func,
parameters=params
)
self.tools[func.__name__] = tool
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
return decorator
def _extract_parameters(self, sig: inspect.Signature) -> dict:
"""Convert function signature to JSON Schema"""
properties = {}
required = []
for name, param in sig.parameters.items():
prop = {"type": "string"} # Default
# Get type annotation
if param.annotation != inspect.Parameter.empty:
python_type = param.annotation
prop["type"] = self._python_to_json_type(python_type)
# Get description from docstring (simplified)
properties[name] = prop
if param.default == inspect.Parameter.empty:
required.append(name)
return {
"type": "object",
"properties": properties,
"required": required
}
def _python_to_json_type(self, python_type) -> str:
type_map = {
str: "string",
int: "integer",
float: "number",
bool: "boolean",
list: "array",
dict: "object"
}
return type_map.get(python_type, "string")
def get_openai_tools(self) -> list:
"""Convert to OpenAI tools format"""
return [
{
"type": "function",
"function": {
"name": tool.name,
"description": tool.description,
"parameters": tool.parameters
}
}
for tool in self.tools.values()
]
def execute(self, name: str, arguments: dict) -> Any:
"""Execute a registered tool"""
if name not in self.tools:
raise ValueError(f"Unknown tool: {name}")
tool = self.tools[name]
return tool.function(**arguments)
# Usage
registry = ToolRegistry()
@registry.register("Get the current weather for a location")
def get_weather(location: str, unit: str = "fahrenheit") -> dict:
return {"temp": 72, "conditions": "sunny"}
@registry.register("Search for products in the catalog")
def search_products(query: str, limit: int = 10) -> list:
return [{"id": 1, "name": f"Product for {query}"}]
# Get tools for API call
tools = registry.get_openai_tools()
# Execute a tool
result = registry.execute("get_weather", {"location": "NYC"})
Error Handling and Retries
Copy
from tenacity import retry, stop_after_attempt, wait_exponential
class RobustToolExecutor:
"""Execute tools with error handling and retries"""
def __init__(self, registry: ToolRegistry):
self.registry = registry
self.client = OpenAI()
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10)
)
def execute_tool(self, name: str, arguments: dict) -> dict:
"""Execute with retries and error handling"""
try:
result = self.registry.execute(name, arguments)
return {"success": True, "result": result}
except Exception as e:
return {"success": False, "error": str(e)}
def process_message(self, user_message: str) -> str:
"""Full processing loop with error handling"""
messages = [{"role": "user", "content": user_message}]
max_tool_iterations = 5
for _ in range(max_tool_iterations):
response = self.client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=self.registry.get_openai_tools()
)
message = response.choices[0].message
messages.append(message)
if not message.tool_calls:
return message.content
for tool_call in message.tool_calls:
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
result = {"error": "Invalid JSON in arguments"}
else:
result = self.execute_tool(
tool_call.function.name,
args
)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result)
})
return "Maximum tool iterations reached"
Tool Calling Best Practices
1. Clear Descriptions
Copy
# ❌ Bad: Vague description
{
"name": "search",
"description": "Search for stuff"
}
# ✅ Good: Specific and actionable
{
"name": "search_knowledge_base",
"description": "Search the company knowledge base for internal documents, policies, and procedures. Use when the user asks about company-specific information. Returns up to 10 relevant documents with titles and snippets."
}
2. Constrained Parameters
Copy
# ❌ Bad: Unconstrained
{
"name": "set_temperature",
"parameters": {
"type": "object",
"properties": {
"temp": {"type": "number"}
}
}
}
# ✅ Good: Constrained and documented
{
"name": "set_temperature",
"description": "Set thermostat temperature",
"parameters": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"minimum": 60,
"maximum": 85,
"description": "Target temperature in Fahrenheit (60-85)"
},
"mode": {
"type": "string",
"enum": ["heat", "cool", "auto"],
"description": "Heating/cooling mode"
}
},
"required": ["temperature", "mode"]
}
}
3. Tool Selection Guidance
Copy
SYSTEM_PROMPT = """You have access to the following tools:
1. **search_web**: Use for current events, news, or information not in your training data
2. **search_knowledge_base**: Use for company-specific information, policies, procedures
3. **calculate**: Use for any mathematical calculations, even simple ones
4. **get_weather**: Use for current weather conditions
RULES:
- Always use a tool when the user asks for real-time or external information
- Use calculate() for any numbers to avoid errors
- If unsure which tool to use, search_knowledge_base first for internal queries
Do NOT make up information that should come from a tool."""
Common Patterns
Confirmation Before Action
Copy
def process_with_confirmation(user_message: str, dangerous_tools: list[str]) -> str:
"""Require confirmation for dangerous actions"""
messages = [{"role": "user", "content": user_message}]
pending_confirmations = []
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools
)
message = response.choices[0].message
if message.tool_calls:
for tool_call in message.tool_calls:
if tool_call.function.name in dangerous_tools:
pending_confirmations.append({
"tool": tool_call.function.name,
"args": json.loads(tool_call.function.arguments),
"id": tool_call.id
})
if pending_confirmations:
return {
"requires_confirmation": True,
"actions": pending_confirmations,
"message": "Please confirm the following actions:"
}
# Execute if no dangerous tools
return execute_and_respond(messages, message)
Tool Result Caching
Copy
from functools import lru_cache
from hashlib import sha256
class CachedToolExecutor:
def __init__(self, cache_ttl: int = 300):
self.cache = {}
self.cache_ttl = cache_ttl
def _cache_key(self, name: str, args: dict) -> str:
"""Generate cache key from tool name and arguments"""
args_str = json.dumps(args, sort_keys=True)
return sha256(f"{name}:{args_str}".encode()).hexdigest()
def execute(self, name: str, args: dict) -> dict:
"""Execute with caching for idempotent tools"""
# Skip cache for non-idempotent tools
if name in ["send_email", "create_order", "delete_file"]:
return self._execute_direct(name, args)
key = self._cache_key(name, args)
if key in self.cache:
cached = self.cache[key]
if time.time() - cached["time"] < self.cache_ttl:
return cached["result"]
result = self._execute_direct(name, args)
self.cache[key] = {"result": result, "time": time.time()}
return result
Key Takeaways
Define Tools Clearly
Good descriptions and constrained parameters lead to reliable tool selection.
Handle Errors Gracefully
Tools fail—always wrap execution with error handling and return informative errors.
Use Parallel Calls
Enable parallel tool calls for independent operations to reduce latency.
Guard Dangerous Actions
Require confirmation for destructive or irreversible tool calls.
What’s Next
AI Observability & Monitoring
Learn how to monitor, debug, and optimize your LLM applications in production