Skip to main content
Text classification is a fundamental NLP task with broad applications. This chapter covers implementing classification systems using LLMs, from zero-shot to production-grade solutions.

Zero-Shot Classification

Basic Zero-Shot

from openai import OpenAI
import json


class ZeroShotClassifier:
    """Classify text without training examples."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
    
    def classify(
        self,
        text: str,
        labels: list[str],
        label_descriptions: dict[str, str] = None
    ) -> dict:
        """Classify text into one of the provided labels."""
        descriptions = ""
        if label_descriptions:
            descriptions = "\n".join([
                f"- {label}: {desc}"
                for label, desc in label_descriptions.items()
            ])
            descriptions = f"\nLabel descriptions:\n{descriptions}"
        
        prompt = f"""Classify the following text into exactly one of these categories: {', '.join(labels)}
{descriptions}

Text: {text}

Return JSON:
{{
    "label": "chosen_label",
    "confidence": 0.0-1.0,
    "reasoning": "brief explanation"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def classify_multi_label(
        self,
        text: str,
        labels: list[str],
        max_labels: int = None
    ) -> dict:
        """Classify text into multiple applicable labels."""
        limit = f"Select up to {max_labels} labels." if max_labels else "Select all applicable labels."
        
        prompt = f"""Classify the following text into applicable categories.
{limit}

Available labels: {', '.join(labels)}

Text: {text}

Return JSON:
{{
    "labels": ["label1", "label2"],
    "confidences": {{"label1": 0.9, "label2": 0.7}},
    "reasoning": "brief explanation"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)


# Usage
classifier = ZeroShotClassifier()

# Single-label classification
text = "The new iPhone 15 features a titanium design and improved camera system."

result = classifier.classify(
    text,
    labels=["technology", "sports", "politics", "entertainment", "business"],
    label_descriptions={
        "technology": "Products, software, hardware, tech companies",
        "business": "Markets, economics, corporate news"
    }
)

print(f"Label: {result['label']} (confidence: {result['confidence']:.0%})")
print(f"Reasoning: {result['reasoning']}")

# Multi-label classification
text = "Apple announced record iPhone sales, beating analyst expectations."

result = classifier.classify_multi_label(
    text,
    labels=["technology", "business", "finance", "product_launch", "earnings"],
    max_labels=3
)

print(f"Labels: {result['labels']}")

Few-Shot Classification

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class Example:
    """A labeled example for few-shot learning."""
    text: str
    label: str


class FewShotClassifier:
    """Classify text using in-context examples."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
        self.examples: dict[str, list[Example]] = {}
    
    def add_examples(self, label: str, examples: list[str]):
        """Add examples for a label."""
        if label not in self.examples:
            self.examples[label] = []
        
        for text in examples:
            self.examples[label].append(Example(text=text, label=label))
    
    def _build_examples_prompt(self, k_per_class: int = 2) -> str:
        """Build examples section of prompt."""
        lines = ["Examples:"]
        
        for label, exs in self.examples.items():
            for ex in exs[:k_per_class]:
                lines.append(f"Text: {ex.text}")
                lines.append(f"Label: {ex.label}")
                lines.append("")
        
        return "\n".join(lines)
    
    def classify(
        self,
        text: str,
        k_per_class: int = 2
    ) -> dict:
        """Classify text using few-shot examples."""
        labels = list(self.examples.keys())
        examples_prompt = self._build_examples_prompt(k_per_class)
        
        prompt = f"""Classify the text into one of these categories: {', '.join(labels)}

{examples_prompt}

Now classify this text:
Text: {text}

Return JSON:
{{
    "label": "category",
    "confidence": 0.0-1.0
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def classify_batch(
        self,
        texts: list[str],
        k_per_class: int = 2
    ) -> list[dict]:
        """Classify multiple texts efficiently."""
        labels = list(self.examples.keys())
        examples_prompt = self._build_examples_prompt(k_per_class)
        
        texts_formatted = "\n".join([
            f"{i+1}. {text}" for i, text in enumerate(texts)
        ])
        
        prompt = f"""Classify each text into one of these categories: {', '.join(labels)}

{examples_prompt}

Texts to classify:
{texts_formatted}

Return JSON:
{{
    "classifications": [
        {{"index": 1, "label": "category", "confidence": 0.9}}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("classifications", [])


# Usage
classifier = FewShotClassifier()

# Add training examples
classifier.add_examples("positive", [
    "This product is amazing! Best purchase ever.",
    "Exceeded all my expectations. Highly recommend!",
    "Love it! Great quality and fast shipping."
])

classifier.add_examples("negative", [
    "Terrible quality. Broke after one use.",
    "Waste of money. Don't buy this.",
    "Very disappointed. Nothing like the pictures."
])

classifier.add_examples("neutral", [
    "It's okay. Does what it's supposed to do.",
    "Average product. Nothing special.",
    "Decent for the price. Not great, not bad."
])

# Classify new text
result = classifier.classify(
    "Pretty good product but shipping took forever.",
    k_per_class=2
)

print(f"Label: {result['label']} ({result['confidence']:.0%})")

# Batch classification
texts = [
    "Absolutely love this!",
    "Meh, it's fine I guess",
    "Returning immediately"
]

results = classifier.classify_batch(texts)
for r in results:
    print(f"{r['index']}: {r['label']}")

Hierarchical Classification

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class TaxonomyNode:
    """A node in classification taxonomy."""
    name: str
    description: str = ""
    children: list = None
    
    def __post_init__(self):
        if self.children is None:
            self.children = []


class HierarchicalClassifier:
    """Classify text using hierarchical taxonomy."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
        self.taxonomy: dict[str, TaxonomyNode] = {}
    
    def set_taxonomy(self, taxonomy: dict):
        """Set the classification taxonomy."""
        self.taxonomy = taxonomy
    
    def _taxonomy_to_text(self, node: dict, level: int = 0) -> str:
        """Convert taxonomy to text representation."""
        indent = "  " * level
        lines = []
        
        for name, data in node.items():
            desc = data.get("description", "")
            lines.append(f"{indent}- {name}: {desc}")
            
            if "children" in data:
                lines.append(self._taxonomy_to_text(data["children"], level + 1))
        
        return "\n".join(lines)
    
    def classify_hierarchical(self, text: str) -> dict:
        """Classify text through taxonomy hierarchy."""
        taxonomy_text = self._taxonomy_to_text(self.taxonomy)
        
        prompt = f"""Classify this text through the following taxonomy hierarchy.
Select the most specific applicable category at each level.

Taxonomy:
{taxonomy_text}

Text: {text}

Return JSON:
{{
    "path": ["level1", "level2", "level3"],
    "confidence": 0.0-1.0,
    "reasoning": "explanation"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    def classify_with_fallback(self, text: str) -> dict:
        """Classify with fallback to parent category if uncertain."""
        result = self.classify_hierarchical(text)
        
        # If low confidence on deepest level, truncate path
        if result["confidence"] < 0.7 and len(result["path"]) > 1:
            prompt = f"""The classification "{result['path']}" has low confidence.
Should we use a broader category?

Text: {text}
Current path: {result['path']}
Confidence: {result['confidence']}

Return JSON:
{{
    "use_parent": true/false,
    "final_path": ["category", "subcategory"],
    "confidence": 0.0-1.0
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            fallback = json.loads(response.choices[0].message.content)
            if fallback.get("use_parent"):
                result["path"] = fallback["final_path"]
                result["confidence"] = fallback["confidence"]
                result["used_fallback"] = True
        
        return result


# Usage
classifier = HierarchicalClassifier()

classifier.set_taxonomy({
    "Technology": {
        "description": "Tech-related content",
        "children": {
            "Software": {
                "description": "Software and applications",
                "children": {
                    "Mobile Apps": {"description": "Smartphone applications"},
                    "Web Apps": {"description": "Browser-based applications"},
                    "Desktop Software": {"description": "Computer programs"}
                }
            },
            "Hardware": {
                "description": "Physical devices",
                "children": {
                    "Smartphones": {"description": "Mobile phones"},
                    "Computers": {"description": "PCs and laptops"},
                    "Wearables": {"description": "Smartwatches, fitness trackers"}
                }
            }
        }
    },
    "Business": {
        "description": "Business and finance",
        "children": {
            "Startups": {"description": "New companies and ventures"},
            "Enterprise": {"description": "Large corporations"},
            "Markets": {"description": "Stock markets and trading"}
        }
    }
})

text = "The new Apple Watch Series 9 features an improved heart rate sensor."

result = classifier.classify_hierarchical(text)
print(f"Path: {' > '.join(result['path'])}")
print(f"Confidence: {result['confidence']:.0%}")

Confidence Calibration

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class CalibrationResult:
    """Calibrated classification result."""
    label: str
    raw_confidence: float
    calibrated_confidence: float
    uncertainty_type: str


class CalibratedClassifier:
    """Classifier with calibrated confidence scores."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
        self.calibration_factor = 0.85  # Conservative adjustment
    
    def classify_with_uncertainty(
        self,
        text: str,
        labels: list[str]
    ) -> CalibrationResult:
        """Classify with uncertainty quantification."""
        prompt = f"""Classify this text and assess your uncertainty.

Labels: {', '.join(labels)}
Text: {text}

Consider:
1. Epistemic uncertainty: Lack of knowledge about correct answer
2. Aleatoric uncertainty: Inherent ambiguity in the text

Return JSON:
{{
    "label": "chosen_label",
    "confidence": 0.0-1.0,
    "uncertainty_type": "low|epistemic|aleatoric|both",
    "alternative_labels": ["other possible labels"],
    "reasoning": "explanation of choice and uncertainty"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        
        # Apply calibration
        raw_conf = data.get("confidence", 0.5)
        calibrated = self._calibrate_confidence(
            raw_conf,
            data.get("uncertainty_type", "low"),
            len(data.get("alternative_labels", []))
        )
        
        return CalibrationResult(
            label=data["label"],
            raw_confidence=raw_conf,
            calibrated_confidence=calibrated,
            uncertainty_type=data.get("uncertainty_type", "low")
        )
    
    def _calibrate_confidence(
        self,
        raw: float,
        uncertainty_type: str,
        num_alternatives: int
    ) -> float:
        """Apply calibration to raw confidence."""
        # Reduce confidence based on uncertainty type
        uncertainty_penalty = {
            "low": 0.0,
            "epistemic": 0.15,
            "aleatoric": 0.10,
            "both": 0.25
        }.get(uncertainty_type, 0.1)
        
        # Reduce for alternatives
        alt_penalty = min(0.1 * num_alternatives, 0.2)
        
        calibrated = raw * self.calibration_factor - uncertainty_penalty - alt_penalty
        return max(0.0, min(1.0, calibrated))
    
    def classify_with_abstention(
        self,
        text: str,
        labels: list[str],
        min_confidence: float = 0.6
    ) -> dict:
        """Classify or abstain if uncertain."""
        result = self.classify_with_uncertainty(text, labels)
        
        if result.calibrated_confidence < min_confidence:
            return {
                "label": None,
                "abstained": True,
                "reason": f"Confidence {result.calibrated_confidence:.0%} below threshold",
                "uncertainty_type": result.uncertainty_type,
                "suggested_label": result.label
            }
        
        return {
            "label": result.label,
            "abstained": False,
            "confidence": result.calibrated_confidence,
            "uncertainty_type": result.uncertainty_type
        }


# Usage
classifier = CalibratedClassifier()

# Classification with uncertainty
labels = ["positive", "negative", "neutral"]
text = "The product is okay but the customer service was rude."

result = classifier.classify_with_uncertainty(text, labels)
print(f"Label: {result.label}")
print(f"Raw confidence: {result.raw_confidence:.0%}")
print(f"Calibrated: {result.calibrated_confidence:.0%}")
print(f"Uncertainty: {result.uncertainty_type}")

# With abstention
result = classifier.classify_with_abstention(
    "It's complicated...",
    labels,
    min_confidence=0.6
)

if result["abstained"]:
    print(f"Abstained: {result['reason']}")
    print(f"Suggestion: {result['suggested_label']}")
else:
    print(f"Label: {result['label']} ({result['confidence']:.0%})")

Intent Classification

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class Intent:
    """A classified intent with entities."""
    name: str
    confidence: float
    entities: dict
    slots: dict


class IntentClassifier:
    """Classify user intents with entity extraction."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
        self.intents = {}
    
    def register_intent(
        self,
        name: str,
        description: str,
        examples: list[str],
        slots: list[dict] = None
    ):
        """Register an intent with examples and slots."""
        self.intents[name] = {
            "description": description,
            "examples": examples,
            "slots": slots or []
        }
    
    def classify(self, text: str) -> Intent:
        """Classify text into an intent and extract entities."""
        intents_desc = []
        for name, data in self.intents.items():
            examples = ", ".join(f'"{e}"' for e in data["examples"][:2])
            slots = [s["name"] for s in data.get("slots", [])]
            intents_desc.append(
                f"- {name}: {data['description']}\n"
                f"  Examples: {examples}\n"
                f"  Slots: {slots if slots else 'none'}"
            )
        
        prompt = f"""Classify this user message into an intent and extract relevant entities.

Available intents:
{chr(10).join(intents_desc)}

User message: "{text}"

Return JSON:
{{
    "intent": "intent_name",
    "confidence": 0.0-1.0,
    "entities": {{"entity_type": "extracted_value"}},
    "slots": {{"slot_name": "value"}}
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        
        return Intent(
            name=data["intent"],
            confidence=data.get("confidence", 0),
            entities=data.get("entities", {}),
            slots=data.get("slots", {})
        )


# Usage
classifier = IntentClassifier()

# Register intents
classifier.register_intent(
    name="book_flight",
    description="User wants to book a flight",
    examples=[
        "I want to fly to New York",
        "Book me a flight to London"
    ],
    slots=[
        {"name": "origin", "type": "city"},
        {"name": "destination", "type": "city"},
        {"name": "date", "type": "date"}
    ]
)

classifier.register_intent(
    name="check_status",
    description="User wants to check booking status",
    examples=[
        "Where is my booking?",
        "Status of my flight"
    ],
    slots=[
        {"name": "booking_id", "type": "string"}
    ]
)

classifier.register_intent(
    name="cancel_booking",
    description="User wants to cancel a booking",
    examples=[
        "Cancel my reservation",
        "I need to cancel"
    ],
    slots=[
        {"name": "booking_id", "type": "string"}
    ]
)

# Classify
text = "I need to book a flight from Boston to Miami next Friday"

intent = classifier.classify(text)
print(f"Intent: {intent.name} ({intent.confidence:.0%})")
print(f"Slots: {intent.slots}")
print(f"Entities: {intent.entities}")

Production Classification Pipeline

from openai import OpenAI
import json
from dataclasses import dataclass, field
from typing import Optional
import time


@dataclass
class ClassificationResult:
    """Complete classification result."""
    text: str
    primary_label: str
    confidence: float
    all_labels: dict
    processing_time_ms: float
    model_used: str
    metadata: dict = field(default_factory=dict)


class ProductionClassifier:
    """Production-ready classification pipeline."""
    
    def __init__(
        self,
        labels: list[str],
        model: str = "gpt-4o-mini",
        fallback_model: str = "gpt-3.5-turbo"
    ):
        self.client = OpenAI()
        self.labels = labels
        self.model = model
        self.fallback_model = fallback_model
        self.cache = {}
    
    def _get_cache_key(self, text: str) -> str:
        """Generate cache key for text."""
        return hash(text.lower().strip())
    
    def _classify_single(
        self,
        text: str,
        model: str,
        temperature: float = 0
    ) -> dict:
        """Perform single classification call."""
        prompt = f"""Classify this text into one of these categories: {', '.join(self.labels)}

Text: {text}

Return JSON with all label probabilities:
{{
    "primary_label": "most_likely_label",
    "probabilities": {{"label1": 0.8, "label2": 0.15, "label3": 0.05}}
}}"""
        
        response = self.client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"},
            temperature=temperature
        )
        
        return json.loads(response.choices[0].message.content)
    
    def classify(
        self,
        text: str,
        use_cache: bool = True,
        require_confidence: float = 0.5
    ) -> ClassificationResult:
        """Classify text with caching and fallback."""
        start_time = time.time()
        
        # Check cache
        cache_key = self._get_cache_key(text)
        if use_cache and cache_key in self.cache:
            cached = self.cache[cache_key]
            cached.metadata["from_cache"] = True
            return cached
        
        model_used = self.model
        
        try:
            result = self._classify_single(text, self.model)
        except Exception as e:
            # Fallback to simpler model
            model_used = self.fallback_model
            result = self._classify_single(text, self.fallback_model)
        
        primary = result.get("primary_label", self.labels[0])
        probs = result.get("probabilities", {})
        confidence = probs.get(primary, 0)
        
        # Handle low confidence
        if confidence < require_confidence:
            # Try with higher temperature for diversity
            result2 = self._classify_single(text, model_used, temperature=0.3)
            if result2.get("probabilities", {}).get(result2.get("primary_label"), 0) > confidence:
                result = result2
                primary = result["primary_label"]
                probs = result["probabilities"]
                confidence = probs.get(primary, 0)
        
        processing_time = (time.time() - start_time) * 1000
        
        classification = ClassificationResult(
            text=text,
            primary_label=primary,
            confidence=confidence,
            all_labels=probs,
            processing_time_ms=processing_time,
            model_used=model_used,
            metadata={"from_cache": False}
        )
        
        # Cache result
        if use_cache:
            self.cache[cache_key] = classification
        
        return classification
    
    def classify_batch(
        self,
        texts: list[str],
        batch_size: int = 10
    ) -> list[ClassificationResult]:
        """Classify texts in batches."""
        results = []
        
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            
            # Batch classification prompt
            texts_formatted = "\n".join([
                f"{j+1}. {t}" for j, t in enumerate(batch)
            ])
            
            prompt = f"""Classify each text into one of: {', '.join(self.labels)}

Texts:
{texts_formatted}

Return JSON:
{{
    "classifications": [
        {{"index": 1, "label": "category", "confidence": 0.9}}
    ]
}}"""
            
            start_time = time.time()
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            processing_time = (time.time() - start_time) * 1000
            
            data = json.loads(response.choices[0].message.content)
            
            for j, classification in enumerate(data.get("classifications", [])):
                results.append(ClassificationResult(
                    text=batch[j] if j < len(batch) else "",
                    primary_label=classification["label"],
                    confidence=classification.get("confidence", 0),
                    all_labels={},
                    processing_time_ms=processing_time / len(batch),
                    model_used=self.model
                ))
        
        return results


# Usage
classifier = ProductionClassifier(
    labels=["positive", "negative", "neutral"],
    model="gpt-4o-mini"
)

# Single classification
result = classifier.classify("Great product, highly recommend!")
print(f"Label: {result.primary_label}")
print(f"Confidence: {result.confidence:.0%}")
print(f"Processing time: {result.processing_time_ms:.0f}ms")

# Batch classification
texts = [
    "Excellent service!",
    "Terrible experience",
    "It was okay",
    "Love it!",
    "Not worth the money"
]

results = classifier.classify_batch(texts)
for r in results:
    print(f"{r.primary_label}: {r.text[:30]}...")
Classification Best Practices
  • Provide clear label descriptions for better accuracy
  • Use few-shot examples when labels are domain-specific
  • Calibrate confidence scores before production use
  • Implement abstention for low-confidence cases
  • Cache results for repeated classifications

Practice Exercise

Build a classification system that:
  1. Supports zero-shot, few-shot, and hierarchical classification
  2. Provides calibrated confidence scores
  3. Handles multi-label classification
  4. Implements abstention for uncertain cases
  5. Includes batch processing for efficiency
Focus on:
  • Accurate label assignment
  • Well-calibrated confidence estimates
  • Graceful handling of edge cases
  • Production-ready error handling