Skip to main content
LLM outputs can be unpredictable. This chapter covers techniques to validate, parse, and ensure structured outputs from language models.

Instructor for Validated Outputs

Instructor provides type-safe structured outputs using Pydantic.

Basic Usage

import instructor
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Optional


# Patch OpenAI client
client = instructor.from_openai(OpenAI())


class UserInfo(BaseModel):
    """Structured user information."""
    name: str = Field(description="Full name of the user")
    age: int = Field(ge=0, le=150, description="Age in years")
    email: Optional[str] = Field(None, description="Email address")
    occupation: str = Field(description="Current occupation")


def extract_user_info(text: str) -> UserInfo:
    """Extract structured user info from text."""
    return client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=UserInfo,
        messages=[
            {
                "role": "user",
                "content": f"Extract user information from: {text}"
            }
        ]
    )


# Usage
text = """
Hi, I'm Sarah Johnson. I'm 28 years old and work as a software 
engineer at a tech startup. You can reach me at [email protected]
"""

user = extract_user_info(text)
print(f"Name: {user.name}")
print(f"Age: {user.age}")
print(f"Email: {user.email}")
print(f"Occupation: {user.occupation}")

Complex Nested Structures

import instructor
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Optional
from enum import Enum


client = instructor.from_openai(OpenAI())


class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"


class SubTask(BaseModel):
    """A subtask within a task."""
    title: str
    estimated_hours: float = Field(ge=0)
    completed: bool = False


class Task(BaseModel):
    """A structured task with subtasks."""
    title: str
    description: str
    priority: Priority
    assignee: Optional[str] = None
    subtasks: list[SubTask] = Field(default_factory=list)
    tags: list[str] = Field(default_factory=list)


class ProjectPlan(BaseModel):
    """Complete project plan."""
    project_name: str
    objective: str
    tasks: list[Task]
    total_estimated_hours: float = Field(ge=0)


def create_project_plan(description: str) -> ProjectPlan:
    """Generate a structured project plan."""
    return client.chat.completions.create(
        model="gpt-4o",
        response_model=ProjectPlan,
        messages=[
            {
                "role": "system",
                "content": "You are a project planning assistant. Create detailed, actionable project plans."
            },
            {
                "role": "user",
                "content": f"Create a project plan for: {description}"
            }
        ]
    )


# Usage
plan = create_project_plan(
    "Build a REST API for a todo application with user authentication"
)

print(f"Project: {plan.project_name}")
print(f"Objective: {plan.objective}")
print(f"Total Hours: {plan.total_estimated_hours}")

for task in plan.tasks:
    print(f"\nTask: {task.title} [{task.priority.value}]")
    for subtask in task.subtasks:
        print(f"  - {subtask.title} ({subtask.estimated_hours}h)")

Retry Logic with Validation

import instructor
from openai import OpenAI
from pydantic import BaseModel, Field, field_validator
from tenacity import retry, stop_after_attempt


client = instructor.from_openai(OpenAI())


class CodeReview(BaseModel):
    """Structured code review result."""
    summary: str = Field(min_length=10, max_length=500)
    issues: list[str] = Field(min_length=1)
    score: int = Field(ge=1, le=10)
    suggested_improvements: list[str]
    
    @field_validator("issues")
    @classmethod
    def validate_issues(cls, v):
        if not v:
            raise ValueError("At least one issue must be identified")
        return v
    
    @field_validator("score")
    @classmethod
    def validate_score(cls, v):
        if v < 1 or v > 10:
            raise ValueError("Score must be between 1 and 10")
        return v


@retry(stop=stop_after_attempt(3))
def review_code(code: str) -> CodeReview:
    """Review code with automatic retries on validation failure."""
    return client.chat.completions.create(
        model="gpt-4o",
        response_model=CodeReview,
        max_retries=3,
        messages=[
            {
                "role": "system",
                "content": "You are a senior code reviewer. Provide detailed, constructive feedback."
            },
            {
                "role": "user",
                "content": f"Review this code:\n\n```python\n{code}\n```"
            }
        ]
    )


# Usage
code = """
def get_user(id):
    query = f"SELECT * FROM users WHERE id = {id}"
    return db.execute(query)
"""

review = review_code(code)
print(f"Score: {review.score}/10")
print(f"Summary: {review.summary}")
print(f"Issues: {review.issues}")

Custom Validation Strategies

Regex-Based Extraction

import re
from dataclasses import dataclass
from typing import Optional


@dataclass
class ExtractedData:
    """Data extracted using regex patterns."""
    emails: list[str]
    phone_numbers: list[str]
    urls: list[str]
    dates: list[str]


class RegexExtractor:
    """Extract structured data using regex patterns."""
    
    PATTERNS = {
        "email": r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
        "phone": r'\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b',
        "url": r'https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*)',
        "date": r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b',
    }
    
    def extract(self, text: str) -> ExtractedData:
        """Extract all structured data from text."""
        return ExtractedData(
            emails=re.findall(self.PATTERNS["email"], text),
            phone_numbers=re.findall(self.PATTERNS["phone"], text),
            urls=re.findall(self.PATTERNS["url"], text),
            dates=re.findall(self.PATTERNS["date"], text, re.IGNORECASE)
        )
    
    def extract_pattern(self, text: str, pattern_name: str) -> list[str]:
        """Extract specific pattern from text."""
        pattern = self.PATTERNS.get(pattern_name)
        if not pattern:
            raise ValueError(f"Unknown pattern: {pattern_name}")
        return re.findall(pattern, text)


# Usage
extractor = RegexExtractor()

llm_output = """
Contact us at [email protected] or [email protected].
Call 555-123-4567 or +1 (800) 555-0199.
Visit https://www.example.com for more info.
Meeting scheduled for 12/25/2024.
"""

data = extractor.extract(llm_output)
print(f"Emails: {data.emails}")
print(f"Phones: {data.phone_numbers}")
print(f"URLs: {data.urls}")
print(f"Dates: {data.dates}")

JSON Extraction and Validation

import json
import re
from typing import Any, Optional, Type, TypeVar
from pydantic import BaseModel, ValidationError


T = TypeVar("T", bound=BaseModel)


class JSONExtractor:
    """Extract and validate JSON from LLM outputs."""
    
    @staticmethod
    def extract_json(text: str) -> Optional[dict]:
        """Extract JSON from text, handling various formats."""
        # Try direct parse first
        try:
            return json.loads(text)
        except json.JSONDecodeError:
            pass
        
        # Try to find JSON in code blocks
        code_block = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
        if code_block:
            try:
                return json.loads(code_block.group(1).strip())
            except json.JSONDecodeError:
                pass
        
        # Try to find raw JSON object/array
        json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', text)
        if json_match:
            try:
                return json.loads(json_match.group(1))
            except json.JSONDecodeError:
                pass
        
        return None
    
    @classmethod
    def extract_and_validate(
        cls,
        text: str,
        model: Type[T]
    ) -> tuple[Optional[T], list[str]]:
        """Extract JSON and validate against Pydantic model."""
        errors = []
        
        data = cls.extract_json(text)
        if data is None:
            errors.append("Could not extract JSON from response")
            return None, errors
        
        try:
            validated = model.model_validate(data)
            return validated, []
        except ValidationError as e:
            for error in e.errors():
                field = ".".join(str(loc) for loc in error["loc"])
                errors.append(f"{field}: {error['msg']}")
            return None, errors


# Usage
class ProductInfo(BaseModel):
    name: str
    price: float
    in_stock: bool
    categories: list[str]


llm_output = """
Here's the product information:

```json
{
    "name": "Wireless Headphones",
    "price": 99.99,
    "in_stock": true,
    "categories": ["electronics", "audio"]
}
""" product, errors = JSONExtractor.extract_and_validate(llm_output, ProductInfo) if product: print(f”Product: - $”) else: print(f”Errors: “)

### Multi-Step Validation Pipeline

```python
from dataclasses import dataclass
from typing import Callable, Any, Optional
from openai import OpenAI


@dataclass
class ValidationResult:
    """Result of a validation step."""
    valid: bool
    data: Any
    errors: list[str]


class ValidationPipeline:
    """Multi-step validation pipeline for LLM outputs."""
    
    def __init__(self):
        self.steps: list[tuple[str, Callable]] = []
    
    def add_step(
        self,
        name: str,
        validator: Callable[[Any], ValidationResult]
    ) -> "ValidationPipeline":
        """Add a validation step."""
        self.steps.append((name, validator))
        return self
    
    def validate(self, data: Any) -> ValidationResult:
        """Run all validation steps."""
        current_data = data
        all_errors = []
        
        for name, validator in self.steps:
            result = validator(current_data)
            
            if not result.valid:
                all_errors.extend([f"[{name}] {e}" for e in result.errors])
                return ValidationResult(
                    valid=False,
                    data=current_data,
                    errors=all_errors
                )
            
            current_data = result.data
        
        return ValidationResult(
            valid=True,
            data=current_data,
            errors=[]
        )


# Validation functions
def validate_not_empty(data: str) -> ValidationResult:
    if not data or not data.strip():
        return ValidationResult(False, data, ["Response is empty"])
    return ValidationResult(True, data.strip(), [])


def validate_json_format(data: str) -> ValidationResult:
    extracted = JSONExtractor.extract_json(data)
    if extracted is None:
        return ValidationResult(False, data, ["Invalid JSON format"])
    return ValidationResult(True, extracted, [])


def validate_required_fields(required: list[str]):
    def validator(data: dict) -> ValidationResult:
        missing = [f for f in required if f not in data]
        if missing:
            return ValidationResult(
                False, data, [f"Missing required field: {f}" for f in missing]
            )
        return ValidationResult(True, data, [])
    return validator


def validate_field_types(type_map: dict):
    def validator(data: dict) -> ValidationResult:
        errors = []
        for field, expected_type in type_map.items():
            if field in data and not isinstance(data[field], expected_type):
                errors.append(
                    f"Field '{field}' should be {expected_type.__name__}"
                )
        if errors:
            return ValidationResult(False, data, errors)
        return ValidationResult(True, data, [])
    return validator


# Usage
pipeline = ValidationPipeline()
pipeline.add_step("not_empty", validate_not_empty)
pipeline.add_step("json_format", validate_json_format)
pipeline.add_step("required_fields", validate_required_fields(["name", "value"]))
pipeline.add_step("field_types", validate_field_types({"name": str, "value": (int, float)}))

# Test with LLM output
llm_output = '{"name": "temperature", "value": 72.5}'

result = pipeline.validate(llm_output)
if result.valid:
    print(f"Valid data: {result.data}")
else:
    print(f"Validation errors: {result.errors}")

LLM-Based Validation

Use an LLM to validate another LLM’s output.
from openai import OpenAI
import json


class LLMValidator:
    """Use LLM to validate outputs."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
    
    def validate_factual(
        self,
        claim: str,
        context: str = ""
    ) -> dict:
        """Check if a claim is factually accurate."""
        prompt = f"""Evaluate the factual accuracy of this claim:

Claim: {claim}

{f"Context: {context}" if context else ""}

Respond with JSON:
{{
    "is_accurate": true/false,
    "confidence": 0.0-1.0,
    "reasoning": "explanation",
    "corrections": ["list of corrections if inaccurate"]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def validate_consistency(
        self,
        statements: list[str]
    ) -> dict:
        """Check if statements are consistent with each other."""
        prompt = f"""Check these statements for logical consistency:

Statements:
{chr(10).join(f"{i+1}. {s}" for i, s in enumerate(statements))}

Respond with JSON:
{{
    "is_consistent": true/false,
    "contradictions": [
        {{"statement_1": index, "statement_2": index, "explanation": "..."}}
    ],
    "overall_assessment": "summary"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def validate_format(
        self,
        output: str,
        expected_format: str
    ) -> dict:
        """Validate output matches expected format."""
        prompt = f"""Check if this output matches the expected format:

Output:
{output}

Expected format:
{expected_format}

Respond with JSON:
{{
    "matches_format": true/false,
    "issues": ["list of format issues"],
    "suggested_fix": "corrected version if needed"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def validate_safety(
        self,
        content: str
    ) -> dict:
        """Check content for safety issues."""
        prompt = f"""Analyze this content for safety issues:

Content:
{content}

Check for:
1. Harmful or dangerous instructions
2. Personal information exposure
3. Inappropriate content
4. Potential misuse

Respond with JSON:
{{
    "is_safe": true/false,
    "issues": [
        {{"type": "category", "severity": "low/medium/high", "description": "..."}}
    ],
    "recommendation": "action to take"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)


# Usage
validator = LLMValidator()

# Check factual accuracy
result = validator.validate_factual(
    "Python was created by Guido van Rossum in 1989."
)
print(f"Accurate: {result['is_accurate']}")
print(f"Confidence: {result['confidence']}")

# Check consistency
statements = [
    "The meeting is at 3 PM.",
    "Everyone should arrive by 2:30 PM.",
    "The meeting was rescheduled to 4 PM."
]
result = validator.validate_consistency(statements)
print(f"Consistent: {result['is_consistent']}")

Fallback Parsing Strategies

from typing import Any, Optional, Callable
from dataclasses import dataclass
import json


@dataclass
class ParseResult:
    """Result of parsing attempt."""
    success: bool
    data: Any
    method: str
    error: Optional[str] = None


class FallbackParser:
    """Try multiple parsing strategies with fallbacks."""
    
    def __init__(self):
        self.parsers: list[tuple[str, Callable]] = []
    
    def add_parser(
        self,
        name: str,
        parser: Callable[[str], Any]
    ) -> "FallbackParser":
        """Add a parser to the fallback chain."""
        self.parsers.append((name, parser))
        return self
    
    def parse(self, text: str) -> ParseResult:
        """Try parsers in order until one succeeds."""
        for name, parser in self.parsers:
            try:
                result = parser(text)
                return ParseResult(
                    success=True,
                    data=result,
                    method=name
                )
            except Exception as e:
                continue
        
        return ParseResult(
            success=False,
            data=None,
            method="none",
            error="All parsers failed"
        )


# Parser functions
def parse_direct_json(text: str) -> dict:
    return json.loads(text)


def parse_code_block_json(text: str) -> dict:
    import re
    match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
    if match:
        return json.loads(match.group(1))
    raise ValueError("No code block found")


def parse_embedded_json(text: str) -> dict:
    import re
    match = re.search(r'(\{[\s\S]*\})', text)
    if match:
        return json.loads(match.group(1))
    raise ValueError("No JSON object found")


def parse_key_value(text: str) -> dict:
    """Parse key: value format."""
    import re
    result = {}
    for line in text.split("\n"):
        match = re.match(r'^\s*["\']?(\w+)["\']?\s*[:=]\s*(.+)$', line)
        if match:
            key = match.group(1)
            value = match.group(2).strip().strip('"\'')
            # Try to convert to appropriate type
            try:
                value = json.loads(value)
            except:
                pass
            result[key] = value
    if not result:
        raise ValueError("No key-value pairs found")
    return result


def parse_with_llm(client, model: str = "gpt-4o-mini"):
    """Create an LLM-based parser as last resort."""
    def parser(text: str) -> dict:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": "Extract structured data as JSON from the given text."
                },
                {"role": "user", "content": text}
            ],
            response_format={"type": "json_object"}
        )
        return json.loads(response.choices[0].message.content)
    return parser


# Usage
from openai import OpenAI

client = OpenAI()

parser = FallbackParser()
parser.add_parser("direct_json", parse_direct_json)
parser.add_parser("code_block", parse_code_block_json)
parser.add_parser("embedded", parse_embedded_json)
parser.add_parser("key_value", parse_key_value)
parser.add_parser("llm", parse_with_llm(client))

# Test with various formats
outputs = [
    '{"name": "test", "value": 42}',
    'Here is the data:\n```json\n{"name": "test"}\n```',
    'name: test\nvalue: 42',
    'The result is name=test and value=42',
]

for output in outputs:
    result = parser.parse(output)
    print(f"Method: {result.method}, Data: {result.data}")
Validation Best Practices
  • Always validate LLM outputs before using them
  • Use Pydantic for type-safe structured extraction
  • Implement fallback strategies for robustness
  • Consider LLM-based validation for complex checks
  • Log validation failures for debugging and improvement

Practice Exercise

Build a validation system that:
  1. Extracts structured data from free-form LLM responses
  2. Validates against Pydantic schemas with custom validators
  3. Implements multiple fallback parsing strategies
  4. Uses LLM-based validation for complex checks
  5. Provides detailed error messages for failures
Focus on:
  • Handling edge cases and malformed outputs
  • Performance optimization for high-volume validation
  • Comprehensive logging of validation failures
  • Automatic retry and correction strategies