December 2025 Update: Now covers the new Responses API, Predicted Outputs, structured outputs with
response_format, and GPT-4.5 capabilities.Why This Module Matters
The OpenAI API is the most widely-used LLM interface. Every AI startup, enterprise AI feature, and AI-powered tool uses it or something similar. Master this, and you can build anything.Career Impact: Companies pay $200-350K for engineers who can build reliable, production-grade AI features. This module teaches exactly that.
What’s New in 2025
| Feature | Description | Use Case |
|---|---|---|
| Responses API | Simpler, more powerful completions | New projects |
| Predicted Outputs | Speed up edits with known structure | Code refactoring |
| GPT-4.5 | Most capable model | Complex reasoning |
| o1 Reasoning | Chain-of-thought built-in | Math, coding |
| Structured Outputs | Guaranteed JSON schema | API responses |
Your Development Environment
Copy
# Install dependencies
# pip install openai python-dotenv pydantic
# .env file (NEVER commit this)
# OPENAI_API_KEY=sk-...
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
client = OpenAI() # Automatically reads OPENAI_API_KEY
# Verify connection
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Say hello!"}],
max_tokens=10
)
print(response.choices[0].message.content)
Security: Never hardcode API keys. Never commit
.env files. Use environment variables or secret managers in production.Chat Completions: The Foundation
The Complete Request Object
Copy
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
# Required
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain Python decorators"}
],
# Optional but important
temperature=0.7, # 0-2, controls randomness
max_tokens=1000, # Limit output length
top_p=1.0, # Nucleus sampling (usually leave at 1)
frequency_penalty=0, # -2 to 2, reduces repetition
presence_penalty=0, # -2 to 2, encourages new topics
stop=None, # Stop sequences
n=1, # Number of completions
# Advanced
seed=42, # For reproducible outputs
user="user_123", # For abuse monitoring
logprobs=False, # Return token probabilities
)
# Access the response
print(response.choices[0].message.content)
print(response.usage) # Token counts
Production-Ready Chat Function
Copy
from openai import OpenAI
from dataclasses import dataclass
from typing import Optional, List
import json
@dataclass
class ChatMessage:
role: str
content: str
@dataclass
class ChatResponse:
content: str
model: str
input_tokens: int
output_tokens: int
finish_reason: str
cost_estimate: float
class ChatClient:
"""Production-ready OpenAI chat client"""
PRICING = {
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
}
def __init__(self, default_model: str = "gpt-4o-mini"):
self.client = OpenAI()
self.default_model = default_model
def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
pricing = self.PRICING.get(model, self.PRICING["gpt-4o"])
return (input_tokens / 1_000_000 * pricing["input"] +
output_tokens / 1_000_000 * pricing["output"])
def chat(
self,
messages: List[ChatMessage],
model: Optional[str] = None,
temperature: float = 0.7,
max_tokens: Optional[int] = None,
json_response: bool = False
) -> ChatResponse:
"""Send chat completion with full response metadata"""
model = model or self.default_model
kwargs = {
"model": model,
"messages": [{"role": m.role, "content": m.content} for m in messages],
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
if json_response:
kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**kwargs)
choice = response.choices[0]
usage = response.usage
return ChatResponse(
content=choice.message.content,
model=response.model,
input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens,
finish_reason=choice.finish_reason,
cost_estimate=self._calculate_cost(model, usage.prompt_tokens, usage.completion_tokens)
)
# Usage
chat = ChatClient()
response = chat.chat([
ChatMessage("system", "You are a coding tutor."),
ChatMessage("user", "Explain list comprehensions in Python")
])
print(response.content)
print(f"Cost: ${response.cost_estimate:.6f}")
print(f"Tokens: {response.input_tokens} in, {response.output_tokens} out")
Streaming: Real-Time Responses
Why Streaming Matters
Without streaming, users wait 5-30 seconds staring at nothing. With streaming, they see tokens appear instantly—much better UX.Copy
def stream_chat(prompt: str, on_token: callable = print):
"""Stream response with callback for each token"""
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True
)
full_response = []
for chunk in stream:
if chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
full_response.append(token)
on_token(token, end="", flush=True)
print() # Newline at end
return "".join(full_response)
# Basic usage
response = stream_chat("Explain machine learning in one paragraph")
Production Streaming with FastAPI
Copy
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from openai import OpenAI
import json
app = FastAPI()
client = OpenAI()
@app.post("/chat/stream")
async def chat_stream(request: dict):
"""Server-Sent Events streaming endpoint"""
def generate():
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=request["messages"],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
data = {"content": chunk.choices[0].delta.content}
yield f"data: {json.dumps(data)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream"
)
# Frontend JavaScript to consume:
# const eventSource = new EventSource('/chat/stream');
# eventSource.onmessage = (event) => {
# if (event.data !== '[DONE]') {
# const data = JSON.parse(event.data);
# appendToChat(data.content);
# }
# };
Function Calling: LLMs That Take Action
The Pattern
- You define functions the model can “call”
- Model decides which function to call based on user input
- You execute the function and return results
- Model uses results to form final response
Complete Function Calling System
Copy
import json
from typing import Callable, Any
from openai import OpenAI
client = OpenAI()
class FunctionRegistry:
"""Register and execute functions that LLMs can call"""
def __init__(self):
self.functions: dict[str, Callable] = {}
self.schemas: list[dict] = []
def register(self, name: str, description: str, parameters: dict):
"""Decorator to register a function"""
def decorator(func: Callable):
self.functions[name] = func
self.schemas.append({
"type": "function",
"function": {
"name": name,
"description": description,
"parameters": parameters
}
})
return func
return decorator
def execute(self, name: str, arguments: dict) -> Any:
"""Execute a registered function"""
if name not in self.functions:
raise ValueError(f"Unknown function: {name}")
return self.functions[name](**arguments)
# Create registry and register functions
registry = FunctionRegistry()
@registry.register(
name="get_weather",
description="Get current weather for a city",
parameters={
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "default": "celsius"}
},
"required": ["city"]
}
)
def get_weather(city: str, unit: str = "celsius") -> dict:
# Mock implementation - replace with real API
return {"city": city, "temp": 22, "unit": unit, "condition": "sunny"}
@registry.register(
name="search_products",
description="Search product catalog",
parameters={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search terms"},
"max_price": {"type": "number", "description": "Maximum price"},
"category": {"type": "string", "enum": ["electronics", "clothing", "books"]}
},
"required": ["query"]
}
)
def search_products(query: str, max_price: float = None, category: str = None) -> list:
# Mock implementation
return [
{"id": "1", "name": f"{query} Pro", "price": 99.99},
{"id": "2", "name": f"{query} Basic", "price": 49.99}
]
@registry.register(
name="send_email",
description="Send an email to a recipient",
parameters={
"type": "object",
"properties": {
"to": {"type": "string", "description": "Recipient email"},
"subject": {"type": "string"},
"body": {"type": "string"}
},
"required": ["to", "subject", "body"]
}
)
def send_email(to: str, subject: str, body: str) -> dict:
# Mock - replace with real email service
return {"status": "sent", "to": to}
def chat_with_functions(user_message: str) -> str:
"""Complete function calling flow"""
messages = [{"role": "user", "content": user_message}]
# First call - model may request function calls
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=registry.schemas,
tool_choice="auto"
)
message = response.choices[0].message
# If no function calls, return directly
if not message.tool_calls:
return message.content
# Execute all requested functions
messages.append(message)
for tool_call in message.tool_calls:
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
print(f"Executing: {function_name}({arguments})")
result = registry.execute(function_name, arguments)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result)
})
# Final call with function results
final_response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return final_response.choices[0].message.content
# Usage examples
print(chat_with_functions("What's the weather in Paris?"))
print(chat_with_functions("Find me some laptop options under $100"))
print(chat_with_functions("Send an email to [email protected] about the meeting tomorrow"))
Parallel Function Calls
GPT-4 can call multiple functions in one response:Copy
# User: "What's the weather in Paris and London?"
# Model responds with TWO tool_calls
for tool_call in message.tool_calls:
# Execute each in parallel if possible
results.append(execute_async(tool_call))
Structured Outputs: Guaranteed JSON
The Problem Structured Outputs Solve
Copy
# Without structured outputs - unreliable
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Extract the name and email from: John at [email protected]"}]
)
# Might return: "Name: John, Email: [email protected]"
# Or: {"name": "John", "email": "[email protected]"}
# Or: "The name is John and email is [email protected]"
# Parsing nightmare!
With Structured Outputs - Guaranteed
Copy
from pydantic import BaseModel, Field
from typing import Optional, List
from enum import Enum
class Priority(str, Enum):
low = "low"
medium = "medium"
high = "high"
critical = "critical"
class Task(BaseModel):
title: str = Field(description="Short task title")
description: str = Field(description="Detailed description")
priority: Priority
due_date: Optional[str] = Field(description="Due date in YYYY-MM-DD format")
tags: List[str] = Field(default_factory=list)
class TaskExtraction(BaseModel):
tasks: List[Task]
summary: str
# Use with OpenAI
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract tasks from the user's message."},
{"role": "user", "content": """
Need to finish the quarterly report by Friday, it's high priority.
Also should review the new hire's code when I get a chance.
Don't forget to book the team dinner for next month.
"""}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "task_extraction",
"strict": True,
"schema": TaskExtraction.model_json_schema()
}
}
)
# Guaranteed to parse
tasks = TaskExtraction.model_validate_json(response.choices[0].message.content)
for task in tasks.tasks:
print(f"[{task.priority.value.upper()}] {task.title}")
if task.due_date:
print(f" Due: {task.due_date}")
Complex Nested Extraction
Copy
from pydantic import BaseModel
from typing import Optional, List
class Address(BaseModel):
street: str
city: str
state: Optional[str]
country: str
postal_code: Optional[str]
class Company(BaseModel):
name: str
industry: str
employee_count: Optional[str]
class Person(BaseModel):
full_name: str
email: Optional[str]
phone: Optional[str]
job_title: Optional[str]
company: Optional[Company]
address: Optional[Address]
skills: List[str]
class ExtractionResult(BaseModel):
people: List[Person]
confidence: float = Field(ge=0, le=1, description="Confidence score 0-1")
# Extract from unstructured text like resumes, emails, business cards
text = """
Hi, I'm Sarah Chen, a Senior Software Engineer at TechCorp (they have about 500 employees
in the fintech space). You can reach me at [email protected] or 555-0123.
I specialize in Python, distributed systems, and machine learning.
Our office is at 123 Innovation Drive, San Francisco, CA 94102.
"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract structured information from text."},
{"role": "user", "content": text}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "extraction",
"strict": True,
"schema": ExtractionResult.model_json_schema()
}
}
)
result = ExtractionResult.model_validate_json(response.choices[0].message.content)
print(result.people[0].full_name) # "Sarah Chen"
print(result.people[0].company.name) # "TechCorp"
Vision: Processing Images
Image Analysis
Copy
import base64
from pathlib import Path
def encode_image(image_path: str) -> str:
"""Encode image to base64"""
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def analyze_image(image_path: str, prompt: str = "Describe this image in detail") -> str:
"""Analyze an image with GPT-4V"""
base64_image = encode_image(image_path)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high" # or "low" for faster/cheaper
}
}
]
}
],
max_tokens=1000
)
return response.choices[0].message.content
# Usage
description = analyze_image("receipt.jpg", "Extract all items and prices from this receipt")
Multiple Images
Copy
def compare_images(image_paths: list[str], prompt: str) -> str:
"""Compare multiple images"""
content = [{"type": "text", "text": prompt}]
for path in image_paths:
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{encode_image(path)}"}
})
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": content}]
)
return response.choices[0].message.content
# Compare before/after, find differences, etc.
result = compare_images(
["before.jpg", "after.jpg"],
"What changed between these two images?"
)
Production Error Handling
Copy
from openai import OpenAI, APIError, RateLimitError, APIConnectionError, AuthenticationError
import time
from functools import wraps
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def retry_with_backoff(max_retries: int = 3, base_delay: float = 1.0):
"""Decorator for robust API calls with retry logic"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except AuthenticationError:
logger.error("Invalid API key")
raise # Don't retry auth errors
except RateLimitError as e:
last_exception = e
wait = base_delay * (2 ** attempt)
logger.warning(f"Rate limited. Retry {attempt + 1}/{max_retries} in {wait}s")
time.sleep(wait)
except APIConnectionError as e:
last_exception = e
wait = base_delay * (2 ** attempt)
logger.warning(f"Connection error. Retry {attempt + 1}/{max_retries} in {wait}s")
time.sleep(wait)
except APIError as e:
last_exception = e
if e.status_code >= 500: # Server errors
wait = base_delay * (2 ** attempt)
logger.warning(f"Server error {e.status_code}. Retry {attempt + 1}/{max_retries}")
time.sleep(wait)
else:
raise # Don't retry client errors (4xx)
raise last_exception
return wrapper
return decorator
@retry_with_backoff(max_retries=3)
def reliable_chat(messages: list, model: str = "gpt-4o-mini") -> str:
client = OpenAI()
response = client.chat.completions.create(
model=model,
messages=messages,
timeout=30 # 30 second timeout
)
return response.choices[0].message.content
Cost Optimization Strategies
Model Selection Matrix
| Task | Recommended Model | Cost/1M tokens | Why |
|---|---|---|---|
| Simple Q&A | gpt-4o-mini | 0.15/0.60 | Fast, cheap, sufficient |
| Code generation | gpt-4o | 2.50/10.00 | Better accuracy |
| Complex reasoning | gpt-4o | 2.50/10.00 | Necessary for quality |
| Summarization | gpt-4o-mini | 0.15/0.60 | Simple task |
| Data extraction | gpt-4o-mini + structured | 0.15/0.60 | Structured output helps |
| Creative writing | gpt-4o | 2.50/10.00 | Better quality |
Smart Model Router
Copy
from enum import Enum
class TaskComplexity(Enum):
SIMPLE = "simple" # FAQ, summarization, translation
MODERATE = "moderate" # Code explanation, analysis
COMPLEX = "complex" # Code generation, reasoning, creative
def select_model(complexity: TaskComplexity) -> str:
"""Select appropriate model based on task complexity"""
return {
TaskComplexity.SIMPLE: "gpt-4o-mini",
TaskComplexity.MODERATE: "gpt-4o-mini", # Try cheap first
TaskComplexity.COMPLEX: "gpt-4o",
}[complexity]
def smart_chat(prompt: str, complexity: TaskComplexity) -> tuple[str, float]:
"""Chat with cost-aware model selection"""
model = select_model(complexity)
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}]
)
# Calculate cost
usage = response.usage
pricing = {"gpt-4o": (2.50, 10.00), "gpt-4o-mini": (0.15, 0.60)}
input_rate, output_rate = pricing[model]
cost = (usage.prompt_tokens / 1_000_000 * input_rate +
usage.completion_tokens / 1_000_000 * output_rate)
return response.choices[0].message.content, cost
Mini-Project: AI Customer Support Bot
Copy
from openai import OpenAI
from pydantic import BaseModel
from typing import Optional, List
from enum import Enum
from datetime import datetime
client = OpenAI()
# Knowledge base (in production, this would be a vector database)
KNOWLEDGE_BASE = {
"refund_policy": "Refunds are available within 30 days of purchase for unused items.",
"shipping": "Standard shipping takes 5-7 business days. Express is 2-3 days.",
"contact": "Email [email protected] or call 1-800-555-0123",
"hours": "Customer support is available Mon-Fri 9am-6pm EST"
}
class TicketPriority(str, Enum):
low = "low"
medium = "medium"
high = "high"
urgent = "urgent"
class SupportResponse(BaseModel):
answer: str
confidence: float
needs_human: bool
priority: TicketPriority
suggested_actions: List[str]
# Function definitions for the bot
tools = [
{
"type": "function",
"function": {
"name": "lookup_knowledge",
"description": "Look up information in the knowledge base",
"parameters": {
"type": "object",
"properties": {
"topic": {"type": "string", "enum": list(KNOWLEDGE_BASE.keys())}
},
"required": ["topic"]
}
}
},
{
"type": "function",
"function": {
"name": "create_ticket",
"description": "Create a support ticket for human follow-up",
"parameters": {
"type": "object",
"properties": {
"summary": {"type": "string"},
"priority": {"type": "string", "enum": ["low", "medium", "high", "urgent"]},
"customer_email": {"type": "string"}
},
"required": ["summary", "priority"]
}
}
}
]
def lookup_knowledge(topic: str) -> str:
return KNOWLEDGE_BASE.get(topic, "Information not found")
def create_ticket(summary: str, priority: str, customer_email: str = None) -> dict:
ticket_id = f"TKT-{datetime.now().strftime('%Y%m%d%H%M%S')}"
return {"ticket_id": ticket_id, "status": "created", "priority": priority}
def handle_support_request(customer_message: str) -> SupportResponse:
"""Handle a customer support request"""
messages = [
{
"role": "system",
"content": """You are a helpful customer support agent.
Use the lookup_knowledge function to find answers.
Create tickets for complex issues that need human attention.
Always be polite and helpful."""
},
{"role": "user", "content": customer_message}
]
# First call - may request functions
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
tools=tools,
tool_choice="auto"
)
message = response.choices[0].message
# Process function calls if any
if message.tool_calls:
messages.append(message)
for tool_call in message.tool_calls:
func_name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
if func_name == "lookup_knowledge":
result = lookup_knowledge(**args)
elif func_name == "create_ticket":
result = create_ticket(**args)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result)
})
# Get final response
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages
)
message = response.choices[0].message
# Get structured analysis
analysis_response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Analyze this support interaction"},
{"role": "user", "content": f"Customer: {customer_message}\nAgent: {message.content}"}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "support_response",
"schema": SupportResponse.model_json_schema()
}
}
)
return SupportResponse.model_validate_json(
analysis_response.choices[0].message.content
)
# Test the bot
result = handle_support_request("I want a refund for my order from last week")
print(f"Answer: {result.answer}")
print(f"Needs human: {result.needs_human}")
print(f"Priority: {result.priority}")
Key Takeaways
Streaming Is Essential
Always stream for user-facing apps. Nobody wants to wait 10 seconds for a response to appear.
Functions Enable Actions
Function calling turns LLMs from chatbots into agents that can search, book, send, and execute.
Structured Outputs Save Time
Use Pydantic models + json_schema for guaranteed parseable responses. No more regex parsing.
Cost Awareness Matters
gpt-4o-mini is 17x cheaper. Use it for simple tasks, save gpt-4o for complex reasoning.
Bonus: Responses API (2025)
The new Responses API simplifies the chat completions interface and adds powerful features:Copy
from openai import OpenAI
client = OpenAI()
# Basic Responses API usage
response = client.responses.create(
model="gpt-4o",
input="Explain quantum computing simply",
instructions="You are a helpful physics teacher",
)
print(response.output_text)
# With structured output
from pydantic import BaseModel
class Explanation(BaseModel):
concept: str
simple_explanation: str
analogy: str
difficulty_level: str
response = client.responses.create(
model="gpt-4o",
input="Explain neural networks",
text={
"format": {
"type": "json_schema",
"name": "explanation",
"schema": Explanation.model_json_schema()
}
}
)
result = Explanation.model_validate_json(response.output_text)
print(f"Analogy: {result.analogy}")
Predicted Outputs (Speed Boost)
When you know most of the output in advance (like code refactoring), use predicted outputs for 2-5x faster generation:Copy
original_code = '''
def calculate_total(items):
total = 0
for item in items:
total += item.price
return total
'''
# We predict the output will be similar to input
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Refactor this function to use sum()"},
{"role": "user", "content": original_code}
],
prediction={
"type": "content",
"content": original_code # Model uses this as a starting point
}
)
# Much faster because tokens were pre-computed!
print(response.choices[0].message.content)
When to use Predicted Outputs: Code editing, document revisions, template filling—any time the output is structurally similar to something you already have.
What’s Next
Vector Databases
Store embeddings at scale with pgvector and Pinecone for semantic search