Skip to main content
AI image generation enables creating, editing, and transforming images through natural language. This chapter covers practical patterns for production image generation.

DALL-E Image Generation

Basic Image Generation

from openai import OpenAI
import base64
from pathlib import Path


def generate_image(
    prompt: str,
    size: str = "1024x1024",
    quality: str = "standard",
    style: str = "vivid"
) -> str:
    """Generate an image from a text prompt."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size=size,  # 1024x1024, 1792x1024, or 1024x1792
        quality=quality,  # standard or hd
        style=style,  # vivid or natural
        n=1
    )
    
    return response.data[0].url


def generate_and_save(
    prompt: str,
    output_path: str,
    **kwargs
) -> str:
    """Generate image and save locally."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        response_format="b64_json",
        **kwargs
    )
    
    image_data = base64.b64decode(response.data[0].b64_json)
    
    with open(output_path, "wb") as f:
        f.write(image_data)
    
    return output_path


# Usage
# Get URL
url = generate_image(
    "A futuristic city with flying cars at sunset, digital art style",
    quality="hd"
)
print(f"Image URL: {url}")

# Save locally
path = generate_and_save(
    "A cozy coffee shop interior with warm lighting",
    "coffee_shop.png",
    size="1792x1024"
)
print(f"Saved to: {path}")

Prompt Engineering for Images

from openai import OpenAI
from dataclasses import dataclass


@dataclass
class ImageStyle:
    """Predefined image style configurations."""
    name: str
    prefix: str
    suffix: str
    settings: dict


class ImagePromptBuilder:
    """Build effective image generation prompts."""
    
    STYLES = {
        "photorealistic": ImageStyle(
            name="Photorealistic",
            prefix="A photorealistic image of",
            suffix="high detail, 8k resolution, professional photography",
            settings={"style": "natural", "quality": "hd"}
        ),
        "digital_art": ImageStyle(
            name="Digital Art",
            prefix="Digital art illustration of",
            suffix="vibrant colors, detailed, trending on ArtStation",
            settings={"style": "vivid", "quality": "hd"}
        ),
        "oil_painting": ImageStyle(
            name="Oil Painting",
            prefix="Oil painting of",
            suffix="classical style, rich textures, masterful brushwork",
            settings={"style": "natural", "quality": "hd"}
        ),
        "minimalist": ImageStyle(
            name="Minimalist",
            prefix="Minimalist illustration of",
            suffix="simple shapes, clean lines, limited color palette",
            settings={"style": "natural", "quality": "standard"}
        ),
        "watercolor": ImageStyle(
            name="Watercolor",
            prefix="Watercolor painting of",
            suffix="soft edges, flowing colors, artistic",
            settings={"style": "natural", "quality": "hd"}
        ),
        "3d_render": ImageStyle(
            name="3D Render",
            prefix="3D rendered image of",
            suffix="octane render, ray tracing, studio lighting",
            settings={"style": "vivid", "quality": "hd"}
        ),
    }
    
    def __init__(self):
        self.client = OpenAI()
    
    def build_prompt(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None
    ) -> tuple[str, dict]:
        """Build an optimized prompt with style."""
        style = self.STYLES.get(style_name, self.STYLES["digital_art"])
        
        parts = [style.prefix, subject]
        
        if additional_details:
            parts.extend(additional_details)
        
        parts.append(style.suffix)
        
        prompt = ", ".join(parts)
        
        return prompt, style.settings
    
    def generate(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None,
        size: str = "1024x1024"
    ) -> str:
        """Generate image with styled prompt."""
        prompt, settings = self.build_prompt(
            subject, style_name, additional_details
        )
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size=size,
            **settings
        )
        
        return response.data[0].url


# Usage
builder = ImagePromptBuilder()

# Photorealistic style
url = builder.generate(
    "a golden retriever playing in autumn leaves",
    style_name="photorealistic"
)

# Digital art style
url = builder.generate(
    "a futuristic robot gardener tending to plants",
    style_name="digital_art",
    additional_details=["peaceful scene", "morning light"]
)

# 3D render style
url = builder.generate(
    "a glass sculpture of a hummingbird",
    style_name="3d_render"
)

Batch Image Generation

from openai import OpenAI
import asyncio
from dataclasses import dataclass
import time


@dataclass
class ImageResult:
    """Result of an image generation request."""
    prompt: str
    url: str = None
    error: str = None
    generation_time: float = 0


class BatchImageGenerator:
    """Generate multiple images efficiently."""
    
    def __init__(self, rate_limit: float = 1.0):
        self.client = OpenAI()
        self.rate_limit = rate_limit  # Requests per second
        self.last_request_time = 0
    
    def _wait_for_rate_limit(self):
        """Wait to respect rate limits."""
        elapsed = time.time() - self.last_request_time
        wait_time = (1.0 / self.rate_limit) - elapsed
        
        if wait_time > 0:
            time.sleep(wait_time)
        
        self.last_request_time = time.time()
    
    def generate_single(
        self,
        prompt: str,
        **kwargs
    ) -> ImageResult:
        """Generate a single image."""
        self._wait_for_rate_limit()
        
        start_time = time.time()
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=prompt,
                **kwargs
            )
            
            return ImageResult(
                prompt=prompt,
                url=response.data[0].url,
                generation_time=time.time() - start_time
            )
        except Exception as e:
            return ImageResult(
                prompt=prompt,
                error=str(e),
                generation_time=time.time() - start_time
            )
    
    def generate_batch(
        self,
        prompts: list[str],
        progress_callback: callable = None,
        **kwargs
    ) -> list[ImageResult]:
        """Generate multiple images sequentially."""
        results = []
        
        for i, prompt in enumerate(prompts):
            result = self.generate_single(prompt, **kwargs)
            results.append(result)
            
            if progress_callback:
                progress_callback(i + 1, len(prompts), result)
        
        return results
    
    def generate_variations_batch(
        self,
        base_prompt: str,
        variations: list[str],
        **kwargs
    ) -> list[ImageResult]:
        """Generate variations of a base prompt."""
        prompts = [
            f"{base_prompt}, {variation}"
            for variation in variations
        ]
        
        return self.generate_batch(prompts, **kwargs)


# Usage
generator = BatchImageGenerator(rate_limit=0.5)  # 1 image per 2 seconds

# Generate multiple images
prompts = [
    "A serene lake at dawn with mountains in background",
    "A bustling night market in Tokyo",
    "An ancient library with magical floating books"
]

def on_progress(current, total, result):
    status = "OK" if result.url else f"Error: {result.error}"
    print(f"[{current}/{total}] {result.prompt[:30]}... - {status}")

results = generator.generate_batch(prompts, progress_callback=on_progress)

# Generate variations
variations = ["spring", "summer", "autumn", "winter"]
results = generator.generate_variations_batch(
    "A Japanese garden in",
    variations
)

Image Editing

Inpainting with DALL-E

from openai import OpenAI
from PIL import Image
import io
import base64


class ImageEditor:
    """Edit images using AI."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def create_mask(
        self,
        image_size: tuple[int, int],
        mask_region: tuple[int, int, int, int]
    ) -> bytes:
        """Create a mask for editing a specific region."""
        # Create transparent mask
        mask = Image.new("RGBA", image_size, (0, 0, 0, 255))
        
        # Make the edit region transparent (white in mask = area to edit)
        x1, y1, x2, y2 = mask_region
        for x in range(x1, x2):
            for y in range(y1, y2):
                mask.putpixel((x, y), (255, 255, 255, 0))
        
        # Save to bytes
        buffer = io.BytesIO()
        mask.save(buffer, format="PNG")
        return buffer.getvalue()
    
    def edit_image(
        self,
        image_path: str,
        mask_path: str,
        prompt: str,
        size: str = "1024x1024"
    ) -> str:
        """Edit an image using a mask."""
        with open(image_path, "rb") as img_file:
            with open(mask_path, "rb") as mask_file:
                response = self.client.images.edit(
                    model="dall-e-2",  # DALL-E 2 for editing
                    image=img_file,
                    mask=mask_file,
                    prompt=prompt,
                    size=size,
                    n=1
                )
        
        return response.data[0].url
    
    def create_variation(
        self,
        image_path: str,
        n: int = 1,
        size: str = "1024x1024"
    ) -> list[str]:
        """Create variations of an existing image."""
        with open(image_path, "rb") as img_file:
            response = self.client.images.create_variation(
                model="dall-e-2",
                image=img_file,
                n=n,
                size=size
            )
        
        return [img.url for img in response.data]
    
    def prepare_image_for_editing(
        self,
        image_path: str,
        target_size: int = 1024
    ) -> str:
        """Prepare an image for editing (resize and convert)."""
        img = Image.open(image_path)
        
        # Convert to RGBA
        if img.mode != "RGBA":
            img = img.convert("RGBA")
        
        # Resize to square
        img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
        
        # Save to temporary file
        output_path = image_path.rsplit(".", 1)[0] + "_prepared.png"
        img.save(output_path, "PNG")
        
        return output_path


# Usage
editor = ImageEditor()

# Prepare image
prepared = editor.prepare_image_for_editing("original.jpg")

# Create mask programmatically
mask_bytes = editor.create_mask(
    (1024, 1024),
    (300, 300, 700, 700)  # Center region
)

# Save mask
with open("mask.png", "wb") as f:
    f.write(mask_bytes)

# Edit the image
result_url = editor.edit_image(
    prepared,
    "mask.png",
    "A beautiful sunset sky with pink and orange clouds"
)

# Create variations
variations = editor.create_variation("original.png", n=3)

AI-Powered Image Transformation

from openai import OpenAI
import base64


class ImageTransformer:
    """Transform images using vision and generation."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def analyze_image(self, image_path: str) -> str:
        """Analyze image content using GPT-4 Vision."""
        with open(image_path, "rb") as f:
            image_data = base64.standard_b64encode(f.read()).decode()
        
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{image_data}"
                            }
                        },
                        {
                            "type": "text",
                            "text": "Describe this image in detail for image generation. Include style, colors, composition, lighting, and mood."
                        }
                    ]
                }
            ]
        )
        
        return response.choices[0].message.content
    
    def transform_style(
        self,
        image_path: str,
        target_style: str
    ) -> str:
        """Transform image to a different style."""
        # Analyze original image
        description = self.analyze_image(image_path)
        
        # Create prompt for new style
        prompt = f"""Transform this scene to {target_style} style:

Original description: {description}

Create the same scene and composition but in {target_style} style."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1024x1024",
            quality="hd"
        )
        
        return response.data[0].url
    
    def extend_image(
        self,
        image_path: str,
        direction: str,
        extension_prompt: str
    ) -> str:
        """Conceptually extend an image (describe and regenerate larger scene)."""
        # Analyze original
        description = self.analyze_image(image_path)
        
        direction_prompts = {
            "left": "with additional content extending to the left",
            "right": "with additional content extending to the right",
            "up": "with additional content above the original scene",
            "down": "with additional content below the original scene",
            "wider": "as a wider panoramic version of this scene"
        }
        
        direction_text = direction_prompts.get(direction, direction)
        
        prompt = f"""Create an extended version of this scene {direction_text}:

Original scene: {description}

Extension: {extension_prompt}

Maintain consistent style, lighting, and composition."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1792x1024" if direction in ["left", "right", "wider"] else "1024x1792",
            quality="hd"
        )
        
        return response.data[0].url


# Usage
transformer = ImageTransformer()

# Transform to different style
anime_url = transformer.transform_style(
    "photo.jpg",
    "anime"
)

# Extend image
extended_url = transformer.extend_image(
    "landscape.jpg",
    "wider",
    "rolling hills with a distant village"
)

Production Patterns

Image Generation Service

from openai import OpenAI
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import uuid
import time
import hashlib


class ImageStatus(Enum):
    PENDING = "pending"
    GENERATING = "generating"
    COMPLETED = "completed"
    FAILED = "failed"


@dataclass
class ImageJob:
    """An image generation job."""
    id: str
    prompt: str
    status: ImageStatus
    url: Optional[str] = None
    error: Optional[str] = None
    created_at: float = field(default_factory=time.time)
    completed_at: Optional[float] = None
    metadata: dict = field(default_factory=dict)


class ImageGenerationService:
    """Production image generation service."""
    
    def __init__(
        self,
        cache_enabled: bool = True,
        max_prompt_length: int = 4000
    ):
        self.client = OpenAI()
        self.jobs: dict[str, ImageJob] = {}
        self.cache: dict[str, str] = {}
        self.cache_enabled = cache_enabled
        self.max_prompt_length = max_prompt_length
    
    def _get_cache_key(self, prompt: str, settings: dict) -> str:
        """Generate cache key for a request."""
        content = f"{prompt}:{str(sorted(settings.items()))}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def _validate_prompt(self, prompt: str) -> tuple[bool, str]:
        """Validate prompt before generation."""
        if not prompt or not prompt.strip():
            return False, "Prompt cannot be empty"
        
        if len(prompt) > self.max_prompt_length:
            return False, f"Prompt exceeds maximum length of {self.max_prompt_length}"
        
        # Basic content filtering (extend as needed)
        blocked_terms = ["explicit", "violent", "illegal"]
        prompt_lower = prompt.lower()
        
        for term in blocked_terms:
            if term in prompt_lower:
                return False, f"Prompt contains blocked content"
        
        return True, ""
    
    def create_job(
        self,
        prompt: str,
        size: str = "1024x1024",
        quality: str = "standard",
        style: str = "vivid",
        user_id: str = None
    ) -> ImageJob:
        """Create an image generation job."""
        # Validate prompt
        valid, error = self._validate_prompt(prompt)
        if not valid:
            job = ImageJob(
                id=str(uuid.uuid4()),
                prompt=prompt,
                status=ImageStatus.FAILED,
                error=error
            )
            self.jobs[job.id] = job
            return job
        
        settings = {"size": size, "quality": quality, "style": style}
        
        # Check cache
        if self.cache_enabled:
            cache_key = self._get_cache_key(prompt, settings)
            if cache_key in self.cache:
                job = ImageJob(
                    id=str(uuid.uuid4()),
                    prompt=prompt,
                    status=ImageStatus.COMPLETED,
                    url=self.cache[cache_key],
                    completed_at=time.time(),
                    metadata={"cached": True, "user_id": user_id}
                )
                self.jobs[job.id] = job
                return job
        
        # Create job
        job = ImageJob(
            id=str(uuid.uuid4()),
            prompt=prompt,
            status=ImageStatus.PENDING,
            metadata={"settings": settings, "user_id": user_id}
        )
        self.jobs[job.id] = job
        
        return job
    
    def process_job(self, job_id: str) -> ImageJob:
        """Process a pending job."""
        job = self.jobs.get(job_id)
        if not job:
            raise ValueError(f"Job not found: {job_id}")
        
        if job.status != ImageStatus.PENDING:
            return job
        
        job.status = ImageStatus.GENERATING
        settings = job.metadata.get("settings", {})
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=job.prompt,
                **settings
            )
            
            job.url = response.data[0].url
            job.status = ImageStatus.COMPLETED
            job.completed_at = time.time()
            
            # Cache result
            if self.cache_enabled:
                cache_key = self._get_cache_key(job.prompt, settings)
                self.cache[cache_key] = job.url
            
        except Exception as e:
            job.status = ImageStatus.FAILED
            job.error = str(e)
            job.completed_at = time.time()
        
        return job
    
    def get_job(self, job_id: str) -> Optional[ImageJob]:
        """Get job status."""
        return self.jobs.get(job_id)
    
    def get_usage_stats(self) -> dict:
        """Get service usage statistics."""
        completed = [j for j in self.jobs.values() if j.status == ImageStatus.COMPLETED]
        failed = [j for j in self.jobs.values() if j.status == ImageStatus.FAILED]
        
        return {
            "total_jobs": len(self.jobs),
            "completed": len(completed),
            "failed": len(failed),
            "cache_hits": sum(1 for j in completed if j.metadata.get("cached")),
            "avg_generation_time": sum(
                j.completed_at - j.created_at for j in completed if j.completed_at
            ) / len(completed) if completed else 0
        }


# Usage
service = ImageGenerationService()

# Create and process a job
job = service.create_job(
    "A majestic dragon flying over a medieval castle",
    quality="hd",
    user_id="user_123"
)

print(f"Job created: {job.id}")

# Process the job
result = service.process_job(job.id)
print(f"Status: {result.status.value}")

if result.url:
    print(f"Image URL: {result.url}")
else:
    print(f"Error: {result.error}")

# Get stats
stats = service.get_usage_stats()
print(f"Stats: {stats}")
Image Generation Best Practices
  • Use detailed, specific prompts for better results
  • Include style, lighting, and composition details
  • Implement content moderation for user prompts
  • Cache generated images to reduce costs
  • Use appropriate quality settings for your use case

Practice Exercise

Build an image generation platform that:
  1. Accepts natural language descriptions
  2. Enhances prompts automatically for better results
  3. Supports multiple styles and configurations
  4. Implements content moderation
  5. Provides image variations and editing
Focus on:
  • Prompt optimization for quality
  • Cost management through caching
  • Content safety filtering
  • User experience with progress feedback