Intent Classification
Embedding-Based Classification
Copy
import numpy as np
from openai import OpenAI
from dataclasses import dataclass
@dataclass
class Intent:
"""Represents an intent with example queries."""
name: str
description: str
examples: list[str]
embedding: np.ndarray = None
class IntentClassifier:
"""Classify queries into predefined intents using embeddings."""
def __init__(self, intents: list[Intent], model: str = "text-embedding-3-small"):
self.client = OpenAI()
self.model = model
self.intents = intents
self._compute_intent_embeddings()
def _compute_intent_embeddings(self):
"""Compute embeddings for all intent examples."""
for intent in self.intents:
# Combine description and examples
texts = [intent.description] + intent.examples
response = self.client.embeddings.create(
model=self.model,
input=texts
)
# Average all embeddings for this intent
embeddings = [e.embedding for e in response.data]
intent.embedding = np.mean(embeddings, axis=0)
def classify(self, query: str, threshold: float = 0.5) -> tuple[str, float]:
"""Classify a query into an intent."""
# Get query embedding
response = self.client.embeddings.create(
model=self.model,
input=[query]
)
query_embedding = np.array(response.data[0].embedding)
# Find most similar intent
best_intent = None
best_score = -1
for intent in self.intents:
score = self._cosine_similarity(query_embedding, intent.embedding)
if score > best_score:
best_score = score
best_intent = intent.name
if best_score < threshold:
return "unknown", best_score
return best_intent, best_score
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def classify_batch(
self,
queries: list[str],
threshold: float = 0.5
) -> list[tuple[str, float]]:
"""Classify multiple queries."""
response = self.client.embeddings.create(
model=self.model,
input=queries
)
results = []
for embedding_data in response.data:
query_embedding = np.array(embedding_data.embedding)
best_intent = None
best_score = -1
for intent in self.intents:
score = self._cosine_similarity(query_embedding, intent.embedding)
if score > best_score:
best_score = score
best_intent = intent.name
if best_score < threshold:
results.append(("unknown", best_score))
else:
results.append((best_intent, best_score))
return results
# Usage
intents = [
Intent(
name="technical_support",
description="Questions about technical issues, bugs, and troubleshooting",
examples=[
"My application keeps crashing",
"How do I fix this error?",
"The feature isn't working properly"
]
),
Intent(
name="billing",
description="Questions about payments, invoices, and subscriptions",
examples=[
"How do I update my payment method?",
"Where can I find my invoice?",
"I want to cancel my subscription"
]
),
Intent(
name="product_info",
description="Questions about features, capabilities, and product details",
examples=[
"What features are included?",
"Can your product do X?",
"Tell me about your enterprise plan"
]
),
]
classifier = IntentClassifier(intents)
queries = [
"My app won't start after the update",
"How much does the pro plan cost?",
"Does it support Python 3.12?",
]
for query in queries:
intent, confidence = classifier.classify(query)
print(f"Query: {query}")
print(f"Intent: {intent} (confidence: {confidence:.2f})\n")
LLM-Based Classification
Copy
from openai import OpenAI
import json
class LLMIntentClassifier:
"""Classify intents using LLM reasoning."""
def __init__(
self,
intents: dict[str, str],
model: str = "gpt-4o-mini"
):
self.client = OpenAI()
self.model = model
self.intents = intents
def classify(self, query: str) -> dict:
"""Classify a query with reasoning."""
intent_list = "\n".join(
f"- {name}: {desc}"
for name, desc in self.intents.items()
)
prompt = f"""Classify this query into one of the following intents:
{intent_list}
Query: {query}
Respond with JSON:
{{
"intent": "intent_name",
"confidence": 0.0-1.0,
"reasoning": "brief explanation"
}}"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
def classify_with_fallback(
self,
query: str,
confidence_threshold: float = 0.7
) -> dict:
"""Classify with fallback for low-confidence results."""
result = self.classify(query)
if result["confidence"] < confidence_threshold:
result["intent"] = "requires_human_review"
result["original_intent"] = result.get("intent")
return result
# Usage
intents = {
"order_status": "Inquiries about order tracking and delivery",
"refund_request": "Requests for refunds or returns",
"product_question": "Questions about product features or availability",
"complaint": "Complaints about service or product quality",
"general_inquiry": "General questions not fitting other categories"
}
classifier = LLMIntentClassifier(intents)
result = classifier.classify("When will my order arrive? I've been waiting for a week.")
print(f"Intent: {result['intent']}")
print(f"Confidence: {result['confidence']}")
print(f"Reasoning: {result['reasoning']}")
Query Routing
Multi-Model Router
Route queries to the most appropriate model based on complexity:Copy
from openai import OpenAI
from anthropic import Anthropic
from dataclasses import dataclass
from enum import Enum
import json
class ModelTier(Enum):
FAST = "fast" # Simple queries
BALANCED = "balanced" # Moderate complexity
POWERFUL = "powerful" # Complex reasoning
@dataclass
class RouteConfig:
"""Configuration for a route."""
model: str
provider: str
max_tokens: int
temperature: float
class QueryRouter:
"""Route queries to appropriate models based on complexity."""
ROUTES = {
ModelTier.FAST: RouteConfig(
model="gpt-4o-mini",
provider="openai",
max_tokens=512,
temperature=0.3
),
ModelTier.BALANCED: RouteConfig(
model="gpt-4o",
provider="openai",
max_tokens=1024,
temperature=0.5
),
ModelTier.POWERFUL: RouteConfig(
model="claude-sonnet-4-20250514",
provider="anthropic",
max_tokens=2048,
temperature=0.7
),
}
def __init__(self):
self.openai = OpenAI()
self.anthropic = Anthropic()
def analyze_complexity(self, query: str) -> ModelTier:
"""Determine query complexity."""
prompt = f"""Analyze the complexity of this query:
Query: {query}
Consider:
1. Does it require multi-step reasoning?
2. Does it need domain expertise?
3. Is it a simple factual question?
4. Does it require creativity or nuance?
Respond with JSON:
{{
"complexity": "simple" | "moderate" | "complex",
"reasoning": "brief explanation"
}}"""
response = self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
complexity_map = {
"simple": ModelTier.FAST,
"moderate": ModelTier.BALANCED,
"complex": ModelTier.POWERFUL
}
return complexity_map.get(result["complexity"], ModelTier.BALANCED)
def route(self, query: str) -> tuple[str, RouteConfig]:
"""Route query and get response."""
tier = self.analyze_complexity(query)
config = self.ROUTES[tier]
if config.provider == "openai":
response = self.openai.chat.completions.create(
model=config.model,
max_tokens=config.max_tokens,
temperature=config.temperature,
messages=[{"role": "user", "content": query}]
)
return response.choices[0].message.content, config
elif config.provider == "anthropic":
response = self.anthropic.messages.create(
model=config.model,
max_tokens=config.max_tokens,
messages=[{"role": "user", "content": query}]
)
return response.content[0].text, config
raise ValueError(f"Unknown provider: {config.provider}")
# Usage
router = QueryRouter()
queries = [
"What is 2 + 2?",
"Explain the concept of dependency injection.",
"Design a distributed system for real-time collaboration with CRDT support."
]
for query in queries:
response, config = router.route(query)
print(f"Query: {query[:50]}...")
print(f"Routed to: {config.model}")
print(f"Response: {response[:100]}...\n")
Topic-Based Routing
Copy
from openai import OpenAI
from dataclasses import dataclass
from typing import Callable
import json
@dataclass
class TopicHandler:
"""Handler for a specific topic."""
topic: str
description: str
handler: Callable[[str], str]
keywords: list[str]
class TopicRouter:
"""Route queries to topic-specific handlers."""
def __init__(self, handlers: list[TopicHandler]):
self.client = OpenAI()
self.handlers = {h.topic: h for h in handlers}
self._build_topic_index()
def _build_topic_index(self):
"""Build embeddings for topic matching."""
# Create text representations for each topic
self.topic_texts = {}
for topic, handler in self.handlers.items():
text = f"{handler.description}. Keywords: {', '.join(handler.keywords)}"
self.topic_texts[topic] = text
def route(self, query: str) -> tuple[str, str]:
"""Route query to appropriate handler."""
# Use LLM to classify topic
topics = "\n".join(
f"- {topic}: {h.description}"
for topic, h in self.handlers.items()
)
prompt = f"""Match this query to the most appropriate topic:
Topics:
{topics}
Query: {query}
Respond with JSON: {{"topic": "topic_name", "confidence": 0.0-1.0}}"""
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
topic = result["topic"]
if topic in self.handlers:
handler = self.handlers[topic]
return topic, handler.handler(query)
# Fallback to default handler
return "unknown", f"I don't have a specialized handler for this query: {query}"
# Define handlers
def handle_coding(query: str) -> str:
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an expert programmer."},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
def handle_writing(query: str) -> str:
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a professional writer and editor."},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
def handle_math(query: str) -> str:
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a mathematics expert. Show your work."},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
# Create router
handlers = [
TopicHandler(
topic="coding",
description="Programming, software development, and debugging",
handler=handle_coding,
keywords=["code", "program", "function", "bug", "python", "javascript"]
),
TopicHandler(
topic="writing",
description="Writing, editing, and content creation",
handler=handle_writing,
keywords=["write", "edit", "essay", "article", "grammar"]
),
TopicHandler(
topic="math",
description="Mathematics, calculations, and problem solving",
handler=handle_math,
keywords=["calculate", "equation", "solve", "math", "number"]
),
]
router = TopicRouter(handlers)
query = "How do I implement binary search in Python?"
topic, response = router.route(query)
print(f"Routed to: {topic}")
print(f"Response: {response}")
Cost-Optimized Routing
Copy
from openai import OpenAI
from dataclasses import dataclass
from typing import Optional
import time
@dataclass
class ModelConfig:
"""Configuration for a model."""
name: str
cost_per_1k_input: float
cost_per_1k_output: float
avg_latency_ms: float
quality_score: float # 0-1
class CostOptimizedRouter:
"""Route queries to minimize cost while meeting quality requirements."""
MODELS = [
ModelConfig("gpt-4o-mini", 0.00015, 0.0006, 500, 0.85),
ModelConfig("gpt-4o", 0.0025, 0.01, 800, 0.95),
ModelConfig("gpt-4-turbo", 0.01, 0.03, 1000, 0.93),
]
def __init__(
self,
quality_threshold: float = 0.8,
max_latency_ms: float = 2000,
budget_per_query: float = 0.01
):
self.client = OpenAI()
self.quality_threshold = quality_threshold
self.max_latency_ms = max_latency_ms
self.budget_per_query = budget_per_query
def estimate_tokens(self, text: str) -> int:
"""Rough token estimation."""
return len(text) // 4
def select_model(
self,
query: str,
required_quality: Optional[float] = None
) -> ModelConfig:
"""Select the most cost-effective model."""
quality_req = required_quality or self.quality_threshold
# Filter models that meet requirements
viable_models = [
m for m in self.MODELS
if m.quality_score >= quality_req
and m.avg_latency_ms <= self.max_latency_ms
]
if not viable_models:
# Fallback to highest quality model
return max(self.MODELS, key=lambda m: m.quality_score)
# Estimate query cost
input_tokens = self.estimate_tokens(query) / 1000
output_tokens = 0.5 # Estimate 500 output tokens
def estimate_cost(model: ModelConfig) -> float:
return (
input_tokens * model.cost_per_1k_input +
output_tokens * model.cost_per_1k_output
)
# Select cheapest viable model
return min(viable_models, key=estimate_cost)
def route(
self,
query: str,
required_quality: Optional[float] = None
) -> tuple[str, ModelConfig, dict]:
"""Route query and return response with metadata."""
model = self.select_model(query, required_quality)
start_time = time.perf_counter()
response = self.client.chat.completions.create(
model=model.name,
messages=[{"role": "user", "content": query}]
)
latency_ms = (time.perf_counter() - start_time) * 1000
usage = response.usage
actual_cost = (
(usage.prompt_tokens / 1000) * model.cost_per_1k_input +
(usage.completion_tokens / 1000) * model.cost_per_1k_output
)
metadata = {
"model": model.name,
"latency_ms": latency_ms,
"cost": actual_cost,
"input_tokens": usage.prompt_tokens,
"output_tokens": usage.completion_tokens
}
return response.choices[0].message.content, model, metadata
# Usage
router = CostOptimizedRouter(
quality_threshold=0.85,
max_latency_ms=1500,
budget_per_query=0.005
)
# Simple query - should use cheaper model
simple_query = "What is the capital of France?"
response, model, meta = router.route(simple_query)
print(f"Query: {simple_query}")
print(f"Model: {model.name}, Cost: ${meta['cost']:.6f}")
# Complex query with high quality requirement
complex_query = "Explain the mathematical foundations of transformer attention mechanisms."
response, model, meta = router.route(complex_query, required_quality=0.95)
print(f"\nQuery: {complex_query}")
print(f"Model: {model.name}, Cost: ${meta['cost']:.6f}")
Hybrid Routing
Combine multiple routing strategies:Copy
from openai import OpenAI
from dataclasses import dataclass
from typing import Callable, Any
import json
@dataclass
class RoutingDecision:
"""Detailed routing decision."""
model: str
handler: str
reasoning: str
confidence: float
metadata: dict
class HybridRouter:
"""Combine intent, complexity, and cost-based routing."""
def __init__(self):
self.client = OpenAI()
def analyze_query(self, query: str) -> dict:
"""Comprehensive query analysis."""
prompt = f"""Analyze this query comprehensively:
Query: {query}
Provide analysis as JSON:
{{
"intent": "question" | "task" | "creative" | "analysis" | "code",
"complexity": "simple" | "moderate" | "complex",
"domain": "general" | "technical" | "creative" | "analytical",
"expected_length": "short" | "medium" | "long",
"requires_reasoning": true/false,
"requires_creativity": true/false,
"requires_accuracy": true/false
}}"""
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
def decide_route(self, query: str) -> RoutingDecision:
"""Make routing decision based on analysis."""
analysis = self.analyze_query(query)
# Determine best model based on analysis
if analysis["complexity"] == "simple" and not analysis["requires_reasoning"]:
model = "gpt-4o-mini"
reasoning = "Simple query, fast model sufficient"
elif analysis["requires_creativity"]:
model = "gpt-4o"
reasoning = "Creative task benefits from capable model"
elif analysis["complexity"] == "complex" or analysis["requires_reasoning"]:
model = "gpt-4o"
reasoning = "Complex reasoning requires powerful model"
else:
model = "gpt-4o-mini"
reasoning = "Balanced query, using efficient model"
# Determine handler
if analysis["intent"] == "code":
handler = "code_specialist"
elif analysis["intent"] == "creative":
handler = "creative_writer"
elif analysis["domain"] == "technical":
handler = "technical_expert"
else:
handler = "general"
return RoutingDecision(
model=model,
handler=handler,
reasoning=reasoning,
confidence=0.85,
metadata=analysis
)
def route_and_respond(self, query: str) -> tuple[str, RoutingDecision]:
"""Route query and generate response."""
decision = self.decide_route(query)
# Build system prompt based on handler
system_prompts = {
"code_specialist": "You are an expert programmer. Provide clean, documented code.",
"creative_writer": "You are a creative writer. Be imaginative and engaging.",
"technical_expert": "You are a technical expert. Be precise and thorough.",
"general": "You are a helpful assistant."
}
system = system_prompts.get(decision.handler, system_prompts["general"])
response = self.client.chat.completions.create(
model=decision.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content, decision
# Usage
router = HybridRouter()
queries = [
"What is 5 + 5?",
"Write a Python function to sort a list of dictionaries by a key",
"Write a short story about a robot learning to paint",
"Explain the CAP theorem and its implications for distributed databases"
]
for query in queries:
response, decision = router.route_and_respond(query)
print(f"Query: {query[:50]}...")
print(f"Model: {decision.model}")
print(f"Handler: {decision.handler}")
print(f"Reasoning: {decision.reasoning}")
print(f"Response: {response[:100]}...\n")
Routing Best Practices
- Use fast models for routing decisions themselves
- Cache routing decisions for similar queries
- Monitor routing accuracy and adjust thresholds
- Implement fallbacks for routing failures
- Track cost savings from intelligent routing
Practice Exercise
Build a production routing system that:- Classifies queries by intent and complexity
- Routes to appropriate models based on requirements
- Optimizes for cost while meeting quality thresholds
- Tracks routing decisions and outcomes
- Adapts routing rules based on feedback
- Low-latency routing decisions
- Graceful degradation on failures
- A/B testing different routing strategies
- Cost and quality monitoring