Image Generation - Dev Weekends

AI image generation enables creating, editing, and transforming images through natural language. This chapter covers practical patterns for production image generation.

DALL-E Image Generation

Basic Image Generation

from openai import OpenAI
import base64
from pathlib import Path


def generate_image(
    prompt: str,
    size: str = "1024x1024",
    quality: str = "standard",
    style: str = "vivid"
) -> str:
    """Generate an image from a text prompt."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size=size,  # 1024x1024, 1792x1024, or 1024x1792
        quality=quality,  # standard or hd
        style=style,  # vivid or natural
        n=1
    )
    
    return response.data[0].url


def generate_and_save(
    prompt: str,
    output_path: str,
    **kwargs
) -> str:
    """Generate image and save locally."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        response_format="b64_json",
        **kwargs
    )
    
    image_data = base64.b64decode(response.data[0].b64_json)
    
    with open(output_path, "wb") as f:
        f.write(image_data)
    
    return output_path


# Usage
# Get URL
url = generate_image(
    "A futuristic city with flying cars at sunset, digital art style",
    quality="hd"
)
print(f"Image URL: {url}")

# Save locally
path = generate_and_save(
    "A cozy coffee shop interior with warm lighting",
    "coffee_shop.png",
    size="1792x1024"
)
print(f"Saved to: {path}")

Prompt Engineering for Images

from openai import OpenAI
from dataclasses import dataclass


@dataclass
class ImageStyle:
    """Predefined image style configurations."""
    name: str
    prefix: str
    suffix: str
    settings: dict


class ImagePromptBuilder:
    """Build effective image generation prompts."""
    
    STYLES = {
        "photorealistic": ImageStyle(
            name="Photorealistic",
            prefix="A photorealistic image of",
            suffix="high detail, 8k resolution, professional photography",
            settings={"style": "natural", "quality": "hd"}
        ),
        "digital_art": ImageStyle(
            name="Digital Art",
            prefix="Digital art illustration of",
            suffix="vibrant colors, detailed, trending on ArtStation",
            settings={"style": "vivid", "quality": "hd"}
        ),
        "oil_painting": ImageStyle(
            name="Oil Painting",
            prefix="Oil painting of",
            suffix="classical style, rich textures, masterful brushwork",
            settings={"style": "natural", "quality": "hd"}
        ),
        "minimalist": ImageStyle(
            name="Minimalist",
            prefix="Minimalist illustration of",
            suffix="simple shapes, clean lines, limited color palette",
            settings={"style": "natural", "quality": "standard"}
        ),
        "watercolor": ImageStyle(
            name="Watercolor",
            prefix="Watercolor painting of",
            suffix="soft edges, flowing colors, artistic",
            settings={"style": "natural", "quality": "hd"}
        ),
        "3d_render": ImageStyle(
            name="3D Render",
            prefix="3D rendered image of",
            suffix="octane render, ray tracing, studio lighting",
            settings={"style": "vivid", "quality": "hd"}
        ),
    }
    
    def __init__(self):
        self.client = OpenAI()
    
    def build_prompt(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None
    ) -> tuple[str, dict]:
        """Build an optimized prompt with style."""
        style = self.STYLES.get(style_name, self.STYLES["digital_art"])
        
        parts = [style.prefix, subject]
        
        if additional_details:
            parts.extend(additional_details)
        
        parts.append(style.suffix)
        
        prompt = ", ".join(parts)
        
        return prompt, style.settings
    
    def generate(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None,
        size: str = "1024x1024"
    ) -> str:
        """Generate image with styled prompt."""
        prompt, settings = self.build_prompt(
            subject, style_name, additional_details
        )
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size=size,
            **settings
        )
        
        return response.data[0].url


# Usage
builder = ImagePromptBuilder()

# Photorealistic style
url = builder.generate(
    "a golden retriever playing in autumn leaves",
    style_name="photorealistic"
)

# Digital art style
url = builder.generate(
    "a futuristic robot gardener tending to plants",
    style_name="digital_art",
    additional_details=["peaceful scene", "morning light"]
)

# 3D render style
url = builder.generate(
    "a glass sculpture of a hummingbird",
    style_name="3d_render"
)

Batch Image Generation

from openai import OpenAI
import asyncio
from dataclasses import dataclass
import time


@dataclass
class ImageResult:
    """Result of an image generation request."""
    prompt: str
    url: str = None
    error: str = None
    generation_time: float = 0


class BatchImageGenerator:
    """Generate multiple images efficiently."""
    
    def __init__(self, rate_limit: float = 1.0):
        self.client = OpenAI()
        self.rate_limit = rate_limit  # Requests per second
        self.last_request_time = 0
    
    def _wait_for_rate_limit(self):
        """Wait to respect rate limits."""
        elapsed = time.time() - self.last_request_time
        wait_time = (1.0 / self.rate_limit) - elapsed
        
        if wait_time > 0:
            time.sleep(wait_time)
        
        self.last_request_time = time.time()
    
    def generate_single(
        self,
        prompt: str,
        **kwargs
    ) -> ImageResult:
        """Generate a single image."""
        self._wait_for_rate_limit()
        
        start_time = time.time()
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=prompt,
                **kwargs
            )
            
            return ImageResult(
                prompt=prompt,
                url=response.data[0].url,
                generation_time=time.time() - start_time
            )
        except Exception as e:
            return ImageResult(
                prompt=prompt,
                error=str(e),
                generation_time=time.time() - start_time
            )
    
    def generate_batch(
        self,
        prompts: list[str],
        progress_callback: callable = None,
        **kwargs
    ) -> list[ImageResult]:
        """Generate multiple images sequentially."""
        results = []
        
        for i, prompt in enumerate(prompts):
            result = self.generate_single(prompt, **kwargs)
            results.append(result)
            
            if progress_callback:
                progress_callback(i + 1, len(prompts), result)
        
        return results
    
    def generate_variations_batch(
        self,
        base_prompt: str,
        variations: list[str],
        **kwargs
    ) -> list[ImageResult]:
        """Generate variations of a base prompt."""
        prompts = [
            f"{base_prompt}, {variation}"
            for variation in variations
        ]
        
        return self.generate_batch(prompts, **kwargs)


# Usage
generator = BatchImageGenerator(rate_limit=0.5)  # 1 image per 2 seconds

# Generate multiple images
prompts = [
    "A serene lake at dawn with mountains in background",
    "A bustling night market in Tokyo",
    "An ancient library with magical floating books"
]

def on_progress(current, total, result):
    status = "OK" if result.url else f"Error: {result.error}"
    print(f"[{current}/{total}] {result.prompt[:30]}... - {status}")

results = generator.generate_batch(prompts, progress_callback=on_progress)

# Generate variations
variations = ["spring", "summer", "autumn", "winter"]
results = generator.generate_variations_batch(
    "A Japanese garden in",
    variations
)

Image Editing

Inpainting with DALL-E

from openai import OpenAI
from PIL import Image
import io
import base64


class ImageEditor:
    """Edit images using AI."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def create_mask(
        self,
        image_size: tuple[int, int],
        mask_region: tuple[int, int, int, int]
    ) -> bytes:
        """Create a mask for editing a specific region."""
        # Create transparent mask
        mask = Image.new("RGBA", image_size, (0, 0, 0, 255))
        
        # Make the edit region transparent (white in mask = area to edit)
        x1, y1, x2, y2 = mask_region
        for x in range(x1, x2):
            for y in range(y1, y2):
                mask.putpixel((x, y), (255, 255, 255, 0))
        
        # Save to bytes
        buffer = io.BytesIO()
        mask.save(buffer, format="PNG")
        return buffer.getvalue()
    
    def edit_image(
        self,
        image_path: str,
        mask_path: str,
        prompt: str,
        size: str = "1024x1024"
    ) -> str:
        """Edit an image using a mask."""
        with open(image_path, "rb") as img_file:
            with open(mask_path, "rb") as mask_file:
                response = self.client.images.edit(
                    model="dall-e-2",  # DALL-E 2 for editing
                    image=img_file,
                    mask=mask_file,
                    prompt=prompt,
                    size=size,
                    n=1
                )
        
        return response.data[0].url
    
    def create_variation(
        self,
        image_path: str,
        n: int = 1,
        size: str = "1024x1024"
    ) -> list[str]:
        """Create variations of an existing image."""
        with open(image_path, "rb") as img_file:
            response = self.client.images.create_variation(
                model="dall-e-2",
                image=img_file,
                n=n,
                size=size
            )
        
        return [img.url for img in response.data]
    
    def prepare_image_for_editing(
        self,
        image_path: str,
        target_size: int = 1024
    ) -> str:
        """Prepare an image for editing (resize and convert)."""
        img = Image.open(image_path)
        
        # Convert to RGBA
        if img.mode != "RGBA":
            img = img.convert("RGBA")
        
        # Resize to square
        img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
        
        # Save to temporary file
        output_path = image_path.rsplit(".", 1)[0] + "_prepared.png"
        img.save(output_path, "PNG")
        
        return output_path


# Usage
editor = ImageEditor()

# Prepare image
prepared = editor.prepare_image_for_editing("original.jpg")

# Create mask programmatically
mask_bytes = editor.create_mask(
    (1024, 1024),
    (300, 300, 700, 700)  # Center region
)

# Save mask
with open("mask.png", "wb") as f:
    f.write(mask_bytes)

# Edit the image
result_url = editor.edit_image(
    prepared,
    "mask.png",
    "A beautiful sunset sky with pink and orange clouds"
)

# Create variations
variations = editor.create_variation("original.png", n=3)

AI-Powered Image Transformation

from openai import OpenAI
import base64


class ImageTransformer:
    """Transform images using vision and generation."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def analyze_image(self, image_path: str) -> str:
        """Analyze image content using GPT-4 Vision."""
        with open(image_path, "rb") as f:
            image_data = base64.standard_b64encode(f.read()).decode()
        
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{image_data}"
                            }
                        },
                        {
                            "type": "text",
                            "text": "Describe this image in detail for image generation. Include style, colors, composition, lighting, and mood."
                        }
                    ]
                }
            ]
        )
        
        return response.choices[0].message.content
    
    def transform_style(
        self,
        image_path: str,
        target_style: str
    ) -> str:
        """Transform image to a different style."""
        # Analyze original image
        description = self.analyze_image(image_path)
        
        # Create prompt for new style
        prompt = f"""Transform this scene to {target_style} style:

Original description: {description}

Create the same scene and composition but in {target_style} style."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1024x1024",
            quality="hd"
        )
        
        return response.data[0].url
    
    def extend_image(
        self,
        image_path: str,
        direction: str,
        extension_prompt: str
    ) -> str:
        """Conceptually extend an image (describe and regenerate larger scene)."""
        # Analyze original
        description = self.analyze_image(image_path)
        
        direction_prompts = {
            "left": "with additional content extending to the left",
            "right": "with additional content extending to the right",
            "up": "with additional content above the original scene",
            "down": "with additional content below the original scene",
            "wider": "as a wider panoramic version of this scene"
        }
        
        direction_text = direction_prompts.get(direction, direction)
        
        prompt = f"""Create an extended version of this scene {direction_text}:

Original scene: {description}

Extension: {extension_prompt}

Maintain consistent style, lighting, and composition."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1792x1024" if direction in ["left", "right", "wider"] else "1024x1792",
            quality="hd"
        )
        
        return response.data[0].url


# Usage
transformer = ImageTransformer()

# Transform to different style
anime_url = transformer.transform_style(
    "photo.jpg",
    "anime"
)

# Extend image
extended_url = transformer.extend_image(
    "landscape.jpg",
    "wider",
    "rolling hills with a distant village"
)

Production Patterns

Image Generation Service

from openai import OpenAI
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import uuid
import time
import hashlib


class ImageStatus(Enum):
    PENDING = "pending"
    GENERATING = "generating"
    COMPLETED = "completed"
    FAILED = "failed"


@dataclass
class ImageJob:
    """An image generation job."""
    id: str
    prompt: str
    status: ImageStatus
    url: Optional[str] = None
    error: Optional[str] = None
    created_at: float = field(default_factory=time.time)
    completed_at: Optional[float] = None
    metadata: dict = field(default_factory=dict)


class ImageGenerationService:
    """Production image generation service."""
    
    def __init__(
        self,
        cache_enabled: bool = True,
        max_prompt_length: int = 4000
    ):
        self.client = OpenAI()
        self.jobs: dict[str, ImageJob] = {}
        self.cache: dict[str, str] = {}
        self.cache_enabled = cache_enabled
        self.max_prompt_length = max_prompt_length
    
    def _get_cache_key(self, prompt: str, settings: dict) -> str:
        """Generate cache key for a request."""
        content = f"{prompt}:{str(sorted(settings.items()))}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def _validate_prompt(self, prompt: str) -> tuple[bool, str]:
        """Validate prompt before generation."""
        if not prompt or not prompt.strip():
            return False, "Prompt cannot be empty"
        
        if len(prompt) > self.max_prompt_length:
            return False, f"Prompt exceeds maximum length of {self.max_prompt_length}"
        
        # Basic content filtering (extend as needed)
        blocked_terms = ["explicit", "violent", "illegal"]
        prompt_lower = prompt.lower()
        
        for term in blocked_terms:
            if term in prompt_lower:
                return False, f"Prompt contains blocked content"
        
        return True, ""
    
    def create_job(
        self,
        prompt: str,
        size: str = "1024x1024",
        quality: str = "standard",
        style: str = "vivid",
        user_id: str = None
    ) -> ImageJob:
        """Create an image generation job."""
        # Validate prompt
        valid, error = self._validate_prompt(prompt)
        if not valid:
            job = ImageJob(
                id=str(uuid.uuid4()),
                prompt=prompt,
                status=ImageStatus.FAILED,
                error=error
            )
            self.jobs[job.id] = job
            return job
        
        settings = {"size": size, "quality": quality, "style": style}
        
        # Check cache
        if self.cache_enabled:
            cache_key = self._get_cache_key(prompt, settings)
            if cache_key in self.cache:
                job = ImageJob(
                    id=str(uuid.uuid4()),
                    prompt=prompt,
                    status=ImageStatus.COMPLETED,
                    url=self.cache[cache_key],
                    completed_at=time.time(),
                    metadata={"cached": True, "user_id": user_id}
                )
                self.jobs[job.id] = job
                return job
        
        # Create job
        job = ImageJob(
            id=str(uuid.uuid4()),
            prompt=prompt,
            status=ImageStatus.PENDING,
            metadata={"settings": settings, "user_id": user_id}
        )
        self.jobs[job.id] = job
        
        return job
    
    def process_job(self, job_id: str) -> ImageJob:
        """Process a pending job."""
        job = self.jobs.get(job_id)
        if not job:
            raise ValueError(f"Job not found: {job_id}")
        
        if job.status != ImageStatus.PENDING:
            return job
        
        job.status = ImageStatus.GENERATING
        settings = job.metadata.get("settings", {})
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=job.prompt,
                **settings
            )
            
            job.url = response.data[0].url
            job.status = ImageStatus.COMPLETED
            job.completed_at = time.time()
            
            # Cache result
            if self.cache_enabled:
                cache_key = self._get_cache_key(job.prompt, settings)
                self.cache[cache_key] = job.url
            
        except Exception as e:
            job.status = ImageStatus.FAILED
            job.error = str(e)
            job.completed_at = time.time()
        
        return job
    
    def get_job(self, job_id: str) -> Optional[ImageJob]:
        """Get job status."""
        return self.jobs.get(job_id)
    
    def get_usage_stats(self) -> dict:
        """Get service usage statistics."""
        completed = [j for j in self.jobs.values() if j.status == ImageStatus.COMPLETED]
        failed = [j for j in self.jobs.values() if j.status == ImageStatus.FAILED]
        
        return {
            "total_jobs": len(self.jobs),
            "completed": len(completed),
            "failed": len(failed),
            "cache_hits": sum(1 for j in completed if j.metadata.get("cached")),
            "avg_generation_time": sum(
                j.completed_at - j.created_at for j in completed if j.completed_at
            ) / len(completed) if completed else 0
        }


# Usage
service = ImageGenerationService()

# Create and process a job
job = service.create_job(
    "A majestic dragon flying over a medieval castle",
    quality="hd",
    user_id="user_123"
)

print(f"Job created: {job.id}")

# Process the job
result = service.process_job(job.id)
print(f"Status: {result.status.value}")

if result.url:
    print(f"Image URL: {result.url}")
else:
    print(f"Error: {result.error}")

# Get stats
stats = service.get_usage_stats()
print(f"Stats: {stats}")

Image Generation Best Practices

Use detailed, specific prompts for better results
Include style, lighting, and composition details
Implement content moderation for user prompts
Cache generated images to reduce costs
Use appropriate quality settings for your use case

Practice Exercise

Build an image generation platform that:

Accepts natural language descriptions
Enhances prompts automatically for better results
Supports multiple styles and configurations
Implements content moderation
Provides image variations and editing

Focus on:

Prompt optimization for quality
Cost management through caching
Content safety filtering
User experience with progress feedback

Overview

Testing & Code Quality

Crash Courses

AI Engineering

Math for ML - Understanding Linear Algebra

Probability & Statistics for ML

Math for ML - Understanding Calculus

ML Mastery

Deep Learning Mastery

NestJS Mastery

Microservices Mastery

Low Level Design

OOP Concepts

SOLID Principles

Design Patterns

LLD Case Studies

System Design (HLD)

Senior Level (L5+/Staff)

HLD Case Studies

Engineering Fundamentals

DevOps & Operations

Azure Cloud Engineering

AWS Cloud

AWS Monitoring & Observability

AWS Security Services

AWS Serverless

AWS Operations

AWS Advanced

AWS Case Studies

GCP Cloud Engineering

DevOps Tools

Database Engineering

HIPAA Compliance Mastery

Operating Systems

Linux Internals

Distributed Systems

Networking Mastery

Build Your Own X

Go Lang Mastery

C Programming

Classic Research Papers

Distributed System Tools

​DALL-E Image Generation

​Basic Image Generation

​Prompt Engineering for Images

​Batch Image Generation

​Image Editing

​Inpainting with DALL-E

​AI-Powered Image Transformation

​Production Patterns

​Image Generation Service

​Practice Exercise