Synthetic Data Generation

LLMs excel at generating synthetic data for training, testing, and augmentation. This chapter covers patterns for creating high-quality synthetic datasets.

Training Data Generation

Basic Data Generation

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class DataExample:
    """A single training example."""
    input: str
    output: str
    metadata: dict = None


class DataGenerator:
    """Generate synthetic training data."""
    
    def __init__(self, model: str = "gpt-4o"):
        self.client = OpenAI()
        self.model = model
    
    def generate_examples(
        self,
        task_description: str,
        num_examples: int = 10,
        seed_examples: list[dict] = None
    ) -> list[DataExample]:
        """Generate training examples for a task."""
        seed_text = ""
        if seed_examples:
            seed_text = "Here are some example patterns to follow:\n"
            for ex in seed_examples:
                seed_text += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
        
        prompt = f"""Generate {num_examples} diverse training examples for this task:

Task: {task_description}

{seed_text}

Requirements:
- Make examples diverse in content and complexity
- Ensure outputs are accurate and consistent with the task
- Vary the input lengths and styles
- Include edge cases and challenging examples

Return JSON:
{{
    "examples": [
        {{"input": "example input", "output": "correct output"}}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        
        return [
            DataExample(input=ex["input"], output=ex["output"])
            for ex in data.get("examples", [])
        ]
    
    def generate_classification_data(
        self,
        labels: list[str],
        label_descriptions: dict[str, str],
        examples_per_label: int = 20
    ) -> list[DataExample]:
        """Generate classification training data."""
        all_examples = []
        
        for label in labels:
            description = label_descriptions.get(label, "")
            
            prompt = f"""Generate {examples_per_label} text examples for the classification label "{label}".

Label description: {description}

Requirements:
- Make examples realistic and varied
- Include different lengths and styles
- Ensure each example clearly belongs to this category
- Include some challenging borderline cases

Return JSON:
{{
    "examples": [
        {{"text": "example text"}}
    ]
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            data = json.loads(response.choices[0].message.content)
            
            for ex in data.get("examples", []):
                all_examples.append(DataExample(
                    input=ex["text"],
                    output=label,
                    metadata={"generated": True}
                ))
        
        return all_examples


# Usage
generator = DataGenerator()

# Generate sentiment analysis data
labels = ["positive", "negative", "neutral"]
descriptions = {
    "positive": "Happy, satisfied, enthusiastic customer feedback",
    "negative": "Unhappy, frustrated, disappointed customer feedback",
    "neutral": "Factual, objective statements without strong emotion"
}

data = generator.generate_classification_data(
    labels=labels,
    label_descriptions=descriptions,
    examples_per_label=10
)

print(f"Generated {len(data)} examples")
for ex in data[:3]:
    print(f"  {ex.output}: {ex.input[:50]}...")

Instruction-Following Data

from openai import OpenAI
import json
from typing import Optional


class InstructionDataGenerator:
    """Generate instruction-following training data."""
    
    def __init__(self, model: str = "gpt-4o"):
        self.client = OpenAI()
        self.model = model
    
    def generate_instructions(
        self,
        domain: str,
        complexity_levels: list[str] = None,
        num_per_level: int = 10
    ) -> list[dict]:
        """Generate diverse instruction-response pairs."""
        complexity_levels = complexity_levels or ["simple", "moderate", "complex"]
        all_data = []
        
        for level in complexity_levels:
            prompt = f"""Generate {num_per_level} instruction-response pairs for the domain: {domain}

Complexity level: {level}
- Simple: Single-step tasks, short responses
- Moderate: Multi-step tasks, detailed responses
- Complex: Nuanced tasks requiring reasoning, comprehensive responses

Requirements:
- Make instructions clear and specific
- Responses should be helpful and complete
- Include variety in instruction formats (questions, commands, requests)
- Ensure responses demonstrate the complexity level

Return JSON:
{{
    "pairs": [
        {{
            "instruction": "user instruction",
            "response": "assistant response",
            "reasoning": "optional chain of thought"
        }}
    ]
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            data = json.loads(response.choices[0].message.content)
            
            for pair in data.get("pairs", []):
                all_data.append({
                    "instruction": pair["instruction"],
                    "response": pair["response"],
                    "complexity": level,
                    "domain": domain,
                    "reasoning": pair.get("reasoning")
                })
        
        return all_data
    
    def generate_multi_turn(
        self,
        scenario: str,
        num_turns: int = 5,
        num_conversations: int = 5
    ) -> list[list[dict]]:
        """Generate multi-turn conversation data."""
        conversations = []
        
        for _ in range(num_conversations):
            prompt = f"""Generate a realistic {num_turns}-turn conversation for this scenario:

Scenario: {scenario}

Requirements:
- User asks progressively related questions
- Assistant provides helpful, accurate responses
- Include follow-up questions and clarifications
- Make the conversation flow naturally

Return JSON:
{{
    "conversation": [
        {{"role": "user", "content": "user message"}},
        {{"role": "assistant", "content": "assistant response"}}
    ]
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            data = json.loads(response.choices[0].message.content)
            conversations.append(data.get("conversation", []))
        
        return conversations


# Usage
generator = InstructionDataGenerator()

# Generate coding instruction data
coding_data = generator.generate_instructions(
    domain="Python programming",
    complexity_levels=["simple", "moderate", "complex"],
    num_per_level=5
)

print(f"Generated {len(coding_data)} instruction pairs")

# Generate multi-turn data
conversations = generator.generate_multi_turn(
    scenario="User is learning about machine learning and wants to understand neural networks",
    num_turns=4,
    num_conversations=3
)

print(f"Generated {len(conversations)} conversations")

Data Augmentation

from openai import OpenAI
import json
import random


class DataAugmenter:
    """Augment existing datasets with synthetic variations."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
    
    def paraphrase(
        self,
        text: str,
        num_variations: int = 3,
        preserve_meaning: bool = True
    ) -> list[str]:
        """Generate paraphrased versions of text."""
        constraint = "Preserve the exact meaning." if preserve_meaning else "Allow slight variations in meaning."
        
        prompt = f"""Generate {num_variations} paraphrased versions of this text.

Original: {text}

Requirements:
- {constraint}
- Use different vocabulary and sentence structures
- Maintain the same tone
- Each version should be distinct

Return JSON: {{"paraphrases": ["version1", "version2", ...]}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("paraphrases", [])
    
    def augment_with_context(
        self,
        examples: list[dict],
        context_variations: list[str]
    ) -> list[dict]:
        """Augment examples with different contexts."""
        augmented = []
        
        for example in examples:
            for context in context_variations:
                prompt = f"""Rewrite this example in a new context.

Original input: {example['input']}
Original output: {example['output']}
New context: {context}

Adapt the example to fit the new context while preserving the task pattern.

Return JSON:
{{
    "input": "adapted input",
    "output": "adapted output"
}}"""
                
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[{"role": "user", "content": prompt}],
                    response_format={"type": "json_object"}
                )
                
                data = json.loads(response.choices[0].message.content)
                augmented.append({
                    "input": data["input"],
                    "output": data["output"],
                    "original_context": example.get("context"),
                    "new_context": context
                })
        
        return augmented
    
    def generate_edge_cases(
        self,
        task_description: str,
        seed_examples: list[dict],
        num_edge_cases: int = 10
    ) -> list[dict]:
        """Generate challenging edge cases."""
        prompt = f"""Generate {num_edge_cases} edge case examples for this task.

Task: {task_description}

Normal examples:
{json.dumps(seed_examples[:3], indent=2)}

Generate challenging edge cases that test:
- Boundary conditions
- Unusual inputs
- Ambiguous cases
- Error handling scenarios
- Edge of distribution examples

Return JSON:
{{
    "edge_cases": [
        {{"input": "edge case input", "output": "correct output", "challenge": "what makes this challenging"}}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("edge_cases", [])
    
    def back_translate(
        self,
        text: str,
        intermediate_language: str = "French"
    ) -> str:
        """Augment by translating to another language and back."""
        # Translate to intermediate language
        translate_prompt = f"Translate to {intermediate_language}: {text}"
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": translate_prompt}]
        )
        
        intermediate = response.choices[0].message.content
        
        # Translate back
        back_prompt = f"Translate to English: {intermediate}"
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": back_prompt}]
        )
        
        return response.choices[0].message.content


# Usage
augmenter = DataAugmenter()

# Paraphrase augmentation
original = "The customer service was excellent and the product exceeded my expectations."
paraphrases = augmenter.paraphrase(original, num_variations=3)

print("Original:", original)
for i, p in enumerate(paraphrases):
    print(f"Variation {i+1}:", p)

# Edge case generation
task = "Sentiment classification of product reviews"
seed = [
    {"input": "Great product!", "output": "positive"},
    {"input": "Terrible quality", "output": "negative"}
]

edge_cases = augmenter.generate_edge_cases(task, seed, num_edge_cases=5)
for case in edge_cases:
    print(f"Edge case: {case['input']} -> {case['output']}")
    print(f"  Challenge: {case['challenge']}")

Quality Filtering

from openai import OpenAI
import json
from dataclasses import dataclass


@dataclass
class QualityScore:
    """Quality assessment for a data example."""
    overall_score: float
    relevance: float
    accuracy: float
    clarity: float
    diversity: float
    issues: list[str]


class DataQualityFilter:
    """Filter synthetic data for quality."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
    
    def score_example(
        self,
        example: dict,
        task_description: str
    ) -> QualityScore:
        """Score a single example for quality."""
        prompt = f"""Evaluate the quality of this training example.

Task: {task_description}
Input: {example['input']}
Output: {example['output']}

Score each dimension from 0 to 1:
- Relevance: Does it match the task?
- Accuracy: Is the output correct?
- Clarity: Is it clear and unambiguous?
- Diversity: Does it add value beyond basic examples?

Return JSON:
{{
    "relevance": 0.0-1.0,
    "accuracy": 0.0-1.0,
    "clarity": 0.0-1.0,
    "diversity": 0.0-1.0,
    "issues": ["list of any problems found"],
    "reasoning": "brief explanation"
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        
        scores = [
            data.get("relevance", 0),
            data.get("accuracy", 0),
            data.get("clarity", 0),
            data.get("diversity", 0)
        ]
        
        return QualityScore(
            overall_score=sum(scores) / len(scores),
            relevance=data.get("relevance", 0),
            accuracy=data.get("accuracy", 0),
            clarity=data.get("clarity", 0),
            diversity=data.get("diversity", 0),
            issues=data.get("issues", [])
        )
    
    def filter_dataset(
        self,
        examples: list[dict],
        task_description: str,
        min_score: float = 0.7
    ) -> tuple[list[dict], list[dict]]:
        """Filter dataset keeping only high-quality examples."""
        accepted = []
        rejected = []
        
        for example in examples:
            score = self.score_example(example, task_description)
            
            example["quality_score"] = score.overall_score
            example["quality_details"] = {
                "relevance": score.relevance,
                "accuracy": score.accuracy,
                "clarity": score.clarity,
                "diversity": score.diversity,
                "issues": score.issues
            }
            
            if score.overall_score >= min_score:
                accepted.append(example)
            else:
                rejected.append(example)
        
        return accepted, rejected
    
    def deduplicate(
        self,
        examples: list[dict],
        similarity_threshold: float = 0.85
    ) -> list[dict]:
        """Remove near-duplicate examples."""
        if len(examples) <= 1:
            return examples
        
        # Use LLM to find similar pairs
        inputs = [ex["input"] for ex in examples]
        
        prompt = f"""Identify groups of very similar or duplicate texts from this list.

Texts:
{json.dumps(dict(enumerate(inputs)), indent=2)}

Group texts that are too similar (>85% semantic overlap).
For each group, identify which index to keep.

Return JSON:
{{
    "duplicate_groups": [
        {{"keep_index": 0, "remove_indices": [1, 2]}}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        
        # Collect indices to remove
        remove_indices = set()
        for group in data.get("duplicate_groups", []):
            remove_indices.update(group.get("remove_indices", []))
        
        # Filter examples
        return [ex for i, ex in enumerate(examples) if i not in remove_indices]


# Usage
filter = DataQualityFilter()

task = "Extract action items from meeting notes"
examples = [
    {"input": "Let's schedule a follow-up meeting", "output": "Schedule follow-up meeting"},
    {"input": "Bob will send the report by Friday", "output": "Bob: Send report by Friday"},
    {"input": "Nice weather today", "output": "Enjoy weather"},  # Bad example
]

accepted, rejected = filter.filter_dataset(examples, task, min_score=0.6)

print(f"Accepted: {len(accepted)}, Rejected: {len(rejected)}")
for ex in rejected:
    print(f"Rejected: {ex['input']}")
    print(f"  Score: {ex['quality_score']:.2f}")
    print(f"  Issues: {ex['quality_details']['issues']}")

Test Data Generation

from openai import OpenAI
import json
from datetime import datetime, timedelta
import random


class TestDataGenerator:
    """Generate realistic test fixtures and mock data."""
    
    def __init__(self, model: str = "gpt-4o-mini"):
        self.client = OpenAI()
        self.model = model
    
    def generate_users(self, count: int = 10, schema: dict = None) -> list[dict]:
        """Generate realistic user data."""
        schema = schema or {
            "id": "integer",
            "name": "full name",
            "email": "email address",
            "age": "integer 18-80",
            "country": "country name",
            "created_at": "ISO datetime"
        }
        
        prompt = f"""Generate {count} realistic user records.

Schema:
{json.dumps(schema, indent=2)}

Requirements:
- Make names and emails consistent
- Distribute ages realistically
- Include international diversity
- Ensure all emails are unique

Return JSON: {{"users": [...]}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("users", [])
    
    def generate_from_schema(
        self,
        schema: dict,
        count: int = 10,
        constraints: dict = None
    ) -> list[dict]:
        """Generate data from arbitrary schema."""
        constraints = constraints or {}
        
        prompt = f"""Generate {count} records matching this schema.

Schema:
{json.dumps(schema, indent=2)}

Constraints:
{json.dumps(constraints, indent=2) if constraints else "None"}

Generate realistic, varied data that follows the schema exactly.

Return JSON: {{"records": [...]}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("records", [])
    
    def generate_api_responses(
        self,
        endpoint_description: str,
        scenarios: list[str],
        include_errors: bool = True
    ) -> list[dict]:
        """Generate mock API responses for testing."""
        prompt = f"""Generate mock API responses for testing.

Endpoint: {endpoint_description}

Scenarios to cover:
{json.dumps(scenarios, indent=2)}

{"Include error responses (400, 404, 500)." if include_errors else ""}

Return JSON:
{{
    "responses": [
        {{
            "scenario": "description",
            "status_code": 200,
            "body": {{}},
            "headers": {{}}
        }}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("responses", [])
    
    def generate_test_documents(
        self,
        document_type: str,
        count: int = 5,
        include_variations: list[str] = None
    ) -> list[str]:
        """Generate test documents of various types."""
        variations = include_variations or ["standard", "edge case", "malformed"]
        
        prompt = f"""Generate {count} {document_type} documents for testing.

Include these variations:
{json.dumps(variations, indent=2)}

Each document should be realistic and complete.
Make them diverse in content and structure.

Return JSON:
{{
    "documents": [
        {{"content": "document text", "variation": "type of variation"}}
    ]
}}"""
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        data = json.loads(response.choices[0].message.content)
        return data.get("documents", [])


# Usage
generator = TestDataGenerator()

# Generate user fixtures
users = generator.generate_users(5)
print("Generated users:")
for user in users:
    print(f"  {user['name']} ({user['email']})")

# Generate from custom schema
product_schema = {
    "sku": "string (8 alphanumeric)",
    "name": "product name",
    "price": "float (10-1000)",
    "category": "electronics|clothing|home|sports",
    "in_stock": "boolean",
    "rating": "float (1-5)"
}

products = generator.generate_from_schema(
    product_schema,
    count=5,
    constraints={"at_least_2_out_of_stock": True}
)

print("\nGenerated products:")
for product in products:
    print(f"  {product['sku']}: {product['name']} - ${product.get('price', 0):.2f}")

# Generate API test responses
api_responses = generator.generate_api_responses(
    endpoint_description="GET /api/orders/{order_id}",
    scenarios=["Valid order", "Order not found", "Server error"],
    include_errors=True
)

print("\nAPI test responses:")
for resp in api_responses:
    print(f"  {resp['scenario']}: {resp['status_code']}")

Evaluation Dataset Creation

from openai import OpenAI
import json


class EvaluationDataGenerator:
    """Generate evaluation/benchmark datasets."""
    
    def __init__(self, model: str = "gpt-4o"):
        self.client = OpenAI()
        self.model = model
    
    def create_benchmark(
        self,
        task_description: str,
        difficulty_distribution: dict = None,
        total_examples: int = 100
    ) -> dict:
        """Create a balanced benchmark dataset."""
        difficulty_distribution = difficulty_distribution or {
            "easy": 0.3,
            "medium": 0.5,
            "hard": 0.2
        }
        
        all_examples = []
        
        for difficulty, proportion in difficulty_distribution.items():
            count = int(total_examples * proportion)
            
            prompt = f"""Generate {count} evaluation examples for this task.

Task: {task_description}
Difficulty: {difficulty}

Requirements for {difficulty} difficulty:
- Easy: Simple, clear cases with obvious answers
- Medium: Requires some reasoning or has minor ambiguity
- Hard: Complex cases requiring nuanced understanding

Include a brief explanation for why each answer is correct.

Return JSON:
{{
    "examples": [
        {{
            "input": "test input",
            "expected_output": "correct answer",
            "explanation": "why this is correct"
        }}
    ]
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            data = json.loads(response.choices[0].message.content)
            
            for ex in data.get("examples", []):
                ex["difficulty"] = difficulty
                all_examples.append(ex)
        
        return {
            "task": task_description,
            "total_examples": len(all_examples),
            "difficulty_distribution": difficulty_distribution,
            "examples": all_examples
        }
    
    def create_adversarial_set(
        self,
        task_description: str,
        attack_types: list[str] = None,
        examples_per_attack: int = 10
    ) -> list[dict]:
        """Generate adversarial test examples."""
        attack_types = attack_types or [
            "distraction",
            "negation",
            "paraphrase",
            "format_variation",
            "edge_case"
        ]
        
        all_adversarial = []
        
        for attack in attack_types:
            prompt = f"""Generate {examples_per_attack} adversarial examples for this task.

Task: {task_description}
Attack type: {attack}

Attack descriptions:
- distraction: Add irrelevant information that might confuse
- negation: Use negations and double negatives
- paraphrase: Unusual phrasing of standard cases
- format_variation: Unusual formatting, punctuation, or structure
- edge_case: Boundary conditions and unusual values

Create examples that are tricky but still have correct answers.

Return JSON:
{{
    "examples": [
        {{
            "input": "adversarial input",
            "expected_output": "correct answer despite adversarial nature",
            "attack_description": "what makes this adversarial"
        }}
    ]
}}"""
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            
            data = json.loads(response.choices[0].message.content)
            
            for ex in data.get("examples", []):
                ex["attack_type"] = attack
                all_adversarial.append(ex)
        
        return all_adversarial


# Usage
eval_generator = EvaluationDataGenerator()

# Create benchmark
benchmark = eval_generator.create_benchmark(
    task_description="Named entity recognition - identify person, organization, and location entities",
    difficulty_distribution={"easy": 0.4, "medium": 0.4, "hard": 0.2},
    total_examples=20
)

print(f"Created benchmark with {benchmark['total_examples']} examples")
for diff, prop in benchmark['difficulty_distribution'].items():
    count = sum(1 for ex in benchmark['examples'] if ex['difficulty'] == diff)
    print(f"  {diff}: {count} examples")

# Create adversarial set
adversarial = eval_generator.create_adversarial_set(
    task_description="Sentiment classification",
    attack_types=["negation", "distraction"],
    examples_per_attack=5
)

print(f"\nCreated {len(adversarial)} adversarial examples")

Synthetic Data Guidelines

Always validate generated data against task requirements
Use seed examples to guide generation style
Include difficulty stratification for robust training
Filter and deduplicate before use
Test on held-out real data to verify effectiveness

Practice Exercise

Build a synthetic data pipeline that:

Generates task-specific training examples
Creates augmented variations of existing data
Filters for quality and removes duplicates
Produces balanced evaluation benchmarks
Includes adversarial test cases

Focus on:

Diversity in generated examples
Accuracy of labels and outputs
Quality filtering at each stage
Realistic edge case coverage

Overview

Testing & Code Quality

Crash Courses

AI Engineering

Math for ML - Understanding Linear Algebra

Probability & Statistics for ML

Math for ML - Understanding Calculus

ML Mastery

Deep Learning Mastery

NestJS Mastery

Microservices Mastery

Low Level Design

OOP Concepts

SOLID Principles

Design Patterns

LLD Case Studies

System Design (HLD)

Senior Level (L5+/Staff)

HLD Case Studies

Engineering Fundamentals

DevOps & Operations

Azure Cloud Engineering

AWS Cloud

AWS Monitoring & Observability

AWS Security Services

AWS Serverless

AWS Operations

AWS Advanced

AWS Case Studies

GCP Cloud Engineering

DevOps Tools

Database Engineering

HIPAA Compliance Mastery

Operating Systems

Linux Internals

Distributed Systems

Networking Mastery

Build Your Own X

Go Lang Mastery

C Programming

Classic Research Papers

Distributed System Tools

​Training Data Generation

​Basic Data Generation

​Instruction-Following Data

​Data Augmentation

​Quality Filtering

​Test Data Generation

​Evaluation Dataset Creation

​Practice Exercise