Skip to main content
December 2025 Update: Complete guide to DSPy - the framework that treats LLM calls as optimizable modules instead of brittle prompts.

What is DSPy?

DSPy (Declarative Self-improving Python) is a framework from Stanford NLP that replaces:
  • Prompting → with Programming
  • String manipulation → with Typed signatures
  • Manual tuning → with Automatic optimization
Traditional Prompting              DSPy Approach
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
"You are an expert..."             class MyModule(dspy.Module):
"Step 1: First..."                     def forward(self, question):
"Example: ..."                             return self.predictor(question=question)
Manual prompt engineering           Automatic optimization
Fragile to model changes           Model-agnostic

Installation and Setup

pip install dspy-ai
import dspy
from dspy import Example

# Configure LM
lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=lm)

# Or use local models
# lm = dspy.LM("ollama/llama3.2")
# lm = dspy.LM("together/meta-llama/Llama-3-70b-chat-hf")

Core Concepts

Signatures: Define Input/Output

Signatures define what your LLM module does:
import dspy

# Simple signature (inline)
qa = dspy.Predict("question -> answer")
result = qa(question="What is the capital of France?")
print(result.answer)  # Paris

# Class-based signature for more control
class QuestionAnswer(dspy.Signature):
    """Answer questions with concise, accurate responses."""
    
    question: str = dspy.InputField(desc="The question to answer")
    answer: str = dspy.OutputField(desc="A concise answer")

qa = dspy.Predict(QuestionAnswer)
result = qa(question="What is machine learning?")
print(result.answer)

Multi-field Signatures

class SentimentAnalysis(dspy.Signature):
    """Analyze the sentiment of text."""
    
    text: str = dspy.InputField()
    sentiment: str = dspy.OutputField(desc="positive, negative, or neutral")
    confidence: float = dspy.OutputField(desc="Confidence score 0-1")
    reasoning: str = dspy.OutputField(desc="Brief explanation")

analyzer = dspy.Predict(SentimentAnalysis)
result = analyzer(text="I absolutely loved this product!")
print(f"Sentiment: {result.sentiment} ({result.confidence})")
print(f"Reason: {result.reasoning}")

Modules: Building Blocks

ChainOfThought: Step-by-Step Reasoning

class MathProblem(dspy.Signature):
    """Solve math word problems step by step."""
    
    problem: str = dspy.InputField()
    answer: float = dspy.OutputField()

# Use ChainOfThought for better reasoning
solver = dspy.ChainOfThought(MathProblem)

result = solver(problem="""
    A store has 45 apples. They sell 12 in the morning and 
    receive a shipment of 30 more. How many apples do they have now?
""")

print(f"Answer: {result.answer}")
print(f"Reasoning: {result.reasoning}")  # Shows step-by-step work

ProgramOfThought: Code-Based Reasoning

class Calculation(dspy.Signature):
    """Solve problems by writing Python code."""
    
    problem: str = dspy.InputField()
    answer: str = dspy.OutputField()

solver = dspy.ProgramOfThought(Calculation)
result = solver(problem="Calculate compound interest on $1000 at 5% for 10 years")

ReAct: Reason and Act

class SearchAndAnswer(dspy.Signature):
    """Answer questions using search when needed."""
    
    question: str = dspy.InputField()
    answer: str = dspy.OutputField()

# Define tools
def search(query: str) -> str:
    """Search the web for information."""
    # Implement actual search
    return f"Search results for: {query}"

def calculate(expression: str) -> str:
    """Evaluate a mathematical expression."""
    return str(eval(expression))

# Create ReAct agent
react = dspy.ReAct(
    SearchAndAnswer,
    tools=[search, calculate],
    max_iters=5
)

result = react(question="What is the population of Tokyo and what's 10% of that?")
print(result.answer)

Building Complex Pipelines

Custom Modules

class RAGModule(dspy.Module):
    """RAG pipeline as a DSPy module."""
    
    def __init__(self, num_docs: int = 3):
        super().__init__()
        self.num_docs = num_docs
        self.retriever = dspy.Retrieve(k=num_docs)
        self.generate = dspy.ChainOfThought("context, question -> answer")
    
    def forward(self, question: str) -> dspy.Prediction:
        # Retrieve relevant documents
        context = self.retriever(question).passages
        
        # Generate answer using context
        result = self.generate(
            context=context,
            question=question
        )
        
        return result

# Usage
rag = RAGModule(num_docs=5)
answer = rag(question="What are the benefits of RAG?")

Multi-Stage Pipelines

class ResearchPipeline(dspy.Module):
    """Multi-stage research and synthesis pipeline."""
    
    def __init__(self):
        super().__init__()
        
        # Stage 1: Query decomposition
        self.decompose = dspy.ChainOfThought(
            "question -> sub_questions: list[str]"
        )
        
        # Stage 2: Research each sub-question
        self.research = dspy.ChainOfThought(
            "sub_question -> findings"
        )
        
        # Stage 3: Synthesize findings
        self.synthesize = dspy.ChainOfThought(
            "question, all_findings: list[str] -> comprehensive_answer"
        )
    
    def forward(self, question: str) -> dspy.Prediction:
        # Decompose into sub-questions
        decomposed = self.decompose(question=question)
        sub_questions = decomposed.sub_questions
        
        # Research each
        all_findings = []
        for sq in sub_questions[:5]:  # Limit
            result = self.research(sub_question=sq)
            all_findings.append(result.findings)
        
        # Synthesize
        final = self.synthesize(
            question=question,
            all_findings=all_findings
        )
        
        return final

Optimization: The Power of DSPy

Automatic Prompt Optimization

DSPy can automatically optimize your prompts using training examples:
# Define training examples
trainset = [
    Example(question="What is 2+2?", answer="4").with_inputs("question"),
    Example(question="What is the capital of Japan?", answer="Tokyo").with_inputs("question"),
    Example(question="Who wrote Hamlet?", answer="William Shakespeare").with_inputs("question"),
]

# Create module
qa = dspy.Predict("question -> answer")

# Optimize with MIPROv2
from dspy.teleprompt import MIPROv2

optimizer = MIPROv2(
    metric=lambda example, pred, trace: pred.answer.lower() == example.answer.lower(),
    num_candidates=10,
    init_temperature=1.0
)

optimized_qa = optimizer.compile(qa, trainset=trainset)

# The optimized module has better prompts
result = optimized_qa(question="What is the speed of light?")

BootstrapFewShot: Learn from Examples

from dspy.teleprompt import BootstrapFewShot

# Define metric
def accuracy_metric(example, pred, trace=None):
    return example.answer.lower() in pred.answer.lower()

# Bootstrap optimizer
bootstrap = BootstrapFewShot(
    metric=accuracy_metric,
    max_bootstrapped_demos=4,
    max_labeled_demos=16
)

# Compile
optimized = bootstrap.compile(qa, trainset=trainset)

Evaluation

from dspy.evaluate import Evaluate

# Create test set
testset = [
    Example(question="What is 3+3?", answer="6").with_inputs("question"),
    Example(question="Capital of Germany?", answer="Berlin").with_inputs("question"),
]

# Evaluate
evaluator = Evaluate(
    devset=testset,
    metric=accuracy_metric,
    display_progress=True
)

score = evaluator(optimized_qa)
print(f"Accuracy: {score}%")

Advanced Patterns

Assertions and Constraints

import dspy
from dspy.primitives.assertions import assert_transform_module, backtrack_handler

class FactChecker(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought("claim -> verdict, evidence")
    
    def forward(self, claim: str):
        result = self.generate(claim=claim)
        
        # Assert constraints
        dspy.Assert(
            result.verdict in ["true", "false", "unverifiable"],
            f"Verdict must be true/false/unverifiable, got: {result.verdict}"
        )
        
        dspy.Assert(
            len(result.evidence) > 20,
            "Evidence must be detailed (>20 chars)"
        )
        
        return result

# Wrap with assertion handling
checker = assert_transform_module(
    FactChecker(),
    backtrack_handler
)

Typed Predictors

from pydantic import BaseModel
from typing import List

class ExtractedEntities(BaseModel):
    people: List[str]
    organizations: List[str]
    locations: List[str]

class EntityExtraction(dspy.Signature):
    """Extract named entities from text."""
    
    text: str = dspy.InputField()
    entities: ExtractedEntities = dspy.OutputField()

extractor = dspy.TypedPredictor(EntityExtraction)
result = extractor(text="Apple CEO Tim Cook visited Paris to meet with Emmanuel Macron.")

print(result.entities.people)  # ['Tim Cook', 'Emmanuel Macron']
print(result.entities.organizations)  # ['Apple']
print(result.entities.locations)  # ['Paris']

Caching and Efficiency

import dspy

# Enable caching
dspy.configure(
    lm=dspy.LM("openai/gpt-4o-mini"),
    experimental=True  # Enables caching
)

# Or use explicit cache
from dspy.utils import DummyLM

# For testing without API calls
dspy.configure(lm=DummyLM([
    {"answer": "Paris"},
    {"answer": "Tokyo"}
]))

DSPy vs LangChain

AspectDSPyLangChain
PhilosophyProgramming LLMsChaining prompts
OptimizationAutomaticManual
Type SafetyBuilt-inLimited
Learning CurveSteeperGentler
Best ForProduction systemsPrototyping

When to Use DSPy

Use DSPy when:
  • Building production systems that need optimization
  • You have training data to improve prompts
  • Type safety and reliability matter
  • You want model-agnostic code
Consider alternatives when:
  • Rapid prototyping is the priority
  • You don’t have training examples
  • Simple one-off tasks

Complete Example: Optimized QA System

import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.evaluate import Evaluate

# 1. Configure
lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=lm)

# 2. Define signature
class QASignature(dspy.Signature):
    """Answer questions accurately and concisely."""
    
    context: str = dspy.InputField(desc="Background information")
    question: str = dspy.InputField(desc="Question to answer")
    answer: str = dspy.OutputField(desc="Concise, accurate answer")

# 3. Build module
class QASystem(dspy.Module):
    def __init__(self):
        super().__init__()
        self.qa = dspy.ChainOfThought(QASignature)
    
    def forward(self, context: str, question: str):
        return self.qa(context=context, question=question)

# 4. Create training data
trainset = [
    Example(
        context="Python is a programming language created by Guido van Rossum.",
        question="Who created Python?",
        answer="Guido van Rossum"
    ).with_inputs("context", "question"),
    Example(
        context="The Eiffel Tower is 330 meters tall and located in Paris.",
        question="How tall is the Eiffel Tower?",
        answer="330 meters"
    ).with_inputs("context", "question"),
    # Add more examples...
]

# 5. Define metric
def answer_match(example, pred, trace=None):
    return example.answer.lower() in pred.answer.lower()

# 6. Optimize
optimizer = BootstrapFewShot(
    metric=answer_match,
    max_bootstrapped_demos=4
)

optimized_qa = optimizer.compile(QASystem(), trainset=trainset)

# 7. Evaluate
testset = [...]  # Your test examples
evaluator = Evaluate(devset=testset, metric=answer_match)
score = evaluator(optimized_qa)
print(f"Test accuracy: {score}%")

# 8. Use in production
result = optimized_qa(
    context="Machine learning is a subset of AI.",
    question="What is machine learning a subset of?"
)
print(result.answer)

# 9. Save optimized module
optimized_qa.save("optimized_qa.json")

# 10. Load later
loaded_qa = QASystem()
loaded_qa.load("optimized_qa.json")

Key Takeaways

Signatures Over Prompts

Define what your LLM does with typed signatures, not string prompts

Automatic Optimization

DSPy optimizes prompts automatically using your training examples

Composable Modules

Build complex pipelines from simple, reusable modules

Model Agnostic

Switch models without rewriting prompts

Resources


What’s Next

Capstone Project

Apply everything you’ve learned in a comprehensive AI engineering project