DALL-E Image Generation
Basic Image Generation
from openai import OpenAI
import base64
from pathlib import Path
def generate_image(
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid"
) -> str:
"""Generate an image from a text prompt."""
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
size=size, # 1024x1024, 1792x1024, or 1024x1792
quality=quality, # standard or hd
style=style, # vivid or natural
n=1
)
return response.data[0].url
def generate_and_save(
prompt: str,
output_path: str,
**kwargs
) -> str:
"""Generate image and save locally."""
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
response_format="b64_json",
**kwargs
)
image_data = base64.b64decode(response.data[0].b64_json)
with open(output_path, "wb") as f:
f.write(image_data)
return output_path
# Usage
# Get URL
url = generate_image(
"A futuristic city with flying cars at sunset, digital art style",
quality="hd"
)
print(f"Image URL: {url}")
# Save locally
path = generate_and_save(
"A cozy coffee shop interior with warm lighting",
"coffee_shop.png",
size="1792x1024"
)
print(f"Saved to: {path}")
Prompt Engineering for Images
from openai import OpenAI
from dataclasses import dataclass
@dataclass
class ImageStyle:
"""Predefined image style configurations."""
name: str
prefix: str
suffix: str
settings: dict
class ImagePromptBuilder:
"""Build effective image generation prompts."""
STYLES = {
"photorealistic": ImageStyle(
name="Photorealistic",
prefix="A photorealistic image of",
suffix="high detail, 8k resolution, professional photography",
settings={"style": "natural", "quality": "hd"}
),
"digital_art": ImageStyle(
name="Digital Art",
prefix="Digital art illustration of",
suffix="vibrant colors, detailed, trending on ArtStation",
settings={"style": "vivid", "quality": "hd"}
),
"oil_painting": ImageStyle(
name="Oil Painting",
prefix="Oil painting of",
suffix="classical style, rich textures, masterful brushwork",
settings={"style": "natural", "quality": "hd"}
),
"minimalist": ImageStyle(
name="Minimalist",
prefix="Minimalist illustration of",
suffix="simple shapes, clean lines, limited color palette",
settings={"style": "natural", "quality": "standard"}
),
"watercolor": ImageStyle(
name="Watercolor",
prefix="Watercolor painting of",
suffix="soft edges, flowing colors, artistic",
settings={"style": "natural", "quality": "hd"}
),
"3d_render": ImageStyle(
name="3D Render",
prefix="3D rendered image of",
suffix="octane render, ray tracing, studio lighting",
settings={"style": "vivid", "quality": "hd"}
),
}
def __init__(self):
self.client = OpenAI()
def build_prompt(
self,
subject: str,
style_name: str = "digital_art",
additional_details: list[str] = None
) -> tuple[str, dict]:
"""Build an optimized prompt with style."""
style = self.STYLES.get(style_name, self.STYLES["digital_art"])
parts = [style.prefix, subject]
if additional_details:
parts.extend(additional_details)
parts.append(style.suffix)
prompt = ", ".join(parts)
return prompt, style.settings
def generate(
self,
subject: str,
style_name: str = "digital_art",
additional_details: list[str] = None,
size: str = "1024x1024"
) -> str:
"""Generate image with styled prompt."""
prompt, settings = self.build_prompt(
subject, style_name, additional_details
)
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size=size,
**settings
)
return response.data[0].url
# Usage
builder = ImagePromptBuilder()
# Photorealistic style
url = builder.generate(
"a golden retriever playing in autumn leaves",
style_name="photorealistic"
)
# Digital art style
url = builder.generate(
"a futuristic robot gardener tending to plants",
style_name="digital_art",
additional_details=["peaceful scene", "morning light"]
)
# 3D render style
url = builder.generate(
"a glass sculpture of a hummingbird",
style_name="3d_render"
)
Batch Image Generation
from openai import OpenAI
import asyncio
from dataclasses import dataclass
import time
@dataclass
class ImageResult:
"""Result of an image generation request."""
prompt: str
url: str = None
error: str = None
generation_time: float = 0
class BatchImageGenerator:
"""Generate multiple images efficiently."""
def __init__(self, rate_limit: float = 1.0):
self.client = OpenAI()
self.rate_limit = rate_limit # Requests per second
self.last_request_time = 0
def _wait_for_rate_limit(self):
"""Wait to respect rate limits."""
elapsed = time.time() - self.last_request_time
wait_time = (1.0 / self.rate_limit) - elapsed
if wait_time > 0:
time.sleep(wait_time)
self.last_request_time = time.time()
def generate_single(
self,
prompt: str,
**kwargs
) -> ImageResult:
"""Generate a single image."""
self._wait_for_rate_limit()
start_time = time.time()
try:
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
**kwargs
)
return ImageResult(
prompt=prompt,
url=response.data[0].url,
generation_time=time.time() - start_time
)
except Exception as e:
return ImageResult(
prompt=prompt,
error=str(e),
generation_time=time.time() - start_time
)
def generate_batch(
self,
prompts: list[str],
progress_callback: callable = None,
**kwargs
) -> list[ImageResult]:
"""Generate multiple images sequentially."""
results = []
for i, prompt in enumerate(prompts):
result = self.generate_single(prompt, **kwargs)
results.append(result)
if progress_callback:
progress_callback(i + 1, len(prompts), result)
return results
def generate_variations_batch(
self,
base_prompt: str,
variations: list[str],
**kwargs
) -> list[ImageResult]:
"""Generate variations of a base prompt."""
prompts = [
f"{base_prompt}, {variation}"
for variation in variations
]
return self.generate_batch(prompts, **kwargs)
# Usage
generator = BatchImageGenerator(rate_limit=0.5) # 1 image per 2 seconds
# Generate multiple images
prompts = [
"A serene lake at dawn with mountains in background",
"A bustling night market in Tokyo",
"An ancient library with magical floating books"
]
def on_progress(current, total, result):
status = "OK" if result.url else f"Error: {result.error}"
print(f"[{current}/{total}] {result.prompt[:30]}... - {status}")
results = generator.generate_batch(prompts, progress_callback=on_progress)
# Generate variations
variations = ["spring", "summer", "autumn", "winter"]
results = generator.generate_variations_batch(
"A Japanese garden in",
variations
)
Image Editing
Inpainting with DALL-E
from openai import OpenAI
from PIL import Image
import io
import base64
class ImageEditor:
"""Edit images using AI."""
def __init__(self):
self.client = OpenAI()
def create_mask(
self,
image_size: tuple[int, int],
mask_region: tuple[int, int, int, int]
) -> bytes:
"""Create a mask for editing a specific region."""
# Create transparent mask
mask = Image.new("RGBA", image_size, (0, 0, 0, 255))
# Make the edit region transparent (white in mask = area to edit)
x1, y1, x2, y2 = mask_region
for x in range(x1, x2):
for y in range(y1, y2):
mask.putpixel((x, y), (255, 255, 255, 0))
# Save to bytes
buffer = io.BytesIO()
mask.save(buffer, format="PNG")
return buffer.getvalue()
def edit_image(
self,
image_path: str,
mask_path: str,
prompt: str,
size: str = "1024x1024"
) -> str:
"""Edit an image using a mask."""
with open(image_path, "rb") as img_file:
with open(mask_path, "rb") as mask_file:
response = self.client.images.edit(
model="dall-e-2", # DALL-E 2 for editing
image=img_file,
mask=mask_file,
prompt=prompt,
size=size,
n=1
)
return response.data[0].url
def create_variation(
self,
image_path: str,
n: int = 1,
size: str = "1024x1024"
) -> list[str]:
"""Create variations of an existing image."""
with open(image_path, "rb") as img_file:
response = self.client.images.create_variation(
model="dall-e-2",
image=img_file,
n=n,
size=size
)
return [img.url for img in response.data]
def prepare_image_for_editing(
self,
image_path: str,
target_size: int = 1024
) -> str:
"""Prepare an image for editing (resize and convert)."""
img = Image.open(image_path)
# Convert to RGBA
if img.mode != "RGBA":
img = img.convert("RGBA")
# Resize to square
img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
# Save to temporary file
output_path = image_path.rsplit(".", 1)[0] + "_prepared.png"
img.save(output_path, "PNG")
return output_path
# Usage
editor = ImageEditor()
# Prepare image
prepared = editor.prepare_image_for_editing("original.jpg")
# Create mask programmatically
mask_bytes = editor.create_mask(
(1024, 1024),
(300, 300, 700, 700) # Center region
)
# Save mask
with open("mask.png", "wb") as f:
f.write(mask_bytes)
# Edit the image
result_url = editor.edit_image(
prepared,
"mask.png",
"A beautiful sunset sky with pink and orange clouds"
)
# Create variations
variations = editor.create_variation("original.png", n=3)
AI-Powered Image Transformation
from openai import OpenAI
import base64
class ImageTransformer:
"""Transform images using vision and generation."""
def __init__(self):
self.client = OpenAI()
def analyze_image(self, image_path: str) -> str:
"""Analyze image content using GPT-4 Vision."""
with open(image_path, "rb") as f:
image_data = base64.standard_b64encode(f.read()).decode()
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_data}"
}
},
{
"type": "text",
"text": "Describe this image in detail for image generation. Include style, colors, composition, lighting, and mood."
}
]
}
]
)
return response.choices[0].message.content
def transform_style(
self,
image_path: str,
target_style: str
) -> str:
"""Transform image to a different style."""
# Analyze original image
description = self.analyze_image(image_path)
# Create prompt for new style
prompt = f"""Transform this scene to {target_style} style:
Original description: {description}
Create the same scene and composition but in {target_style} style."""
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1024x1024",
quality="hd"
)
return response.data[0].url
def extend_image(
self,
image_path: str,
direction: str,
extension_prompt: str
) -> str:
"""Conceptually extend an image (describe and regenerate larger scene)."""
# Analyze original
description = self.analyze_image(image_path)
direction_prompts = {
"left": "with additional content extending to the left",
"right": "with additional content extending to the right",
"up": "with additional content above the original scene",
"down": "with additional content below the original scene",
"wider": "as a wider panoramic version of this scene"
}
direction_text = direction_prompts.get(direction, direction)
prompt = f"""Create an extended version of this scene {direction_text}:
Original scene: {description}
Extension: {extension_prompt}
Maintain consistent style, lighting, and composition."""
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1792x1024" if direction in ["left", "right", "wider"] else "1024x1792",
quality="hd"
)
return response.data[0].url
# Usage
transformer = ImageTransformer()
# Transform to different style
anime_url = transformer.transform_style(
"photo.jpg",
"anime"
)
# Extend image
extended_url = transformer.extend_image(
"landscape.jpg",
"wider",
"rolling hills with a distant village"
)
Production Patterns
Image Generation Service
from openai import OpenAI
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import uuid
import time
import hashlib
class ImageStatus(Enum):
PENDING = "pending"
GENERATING = "generating"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class ImageJob:
"""An image generation job."""
id: str
prompt: str
status: ImageStatus
url: Optional[str] = None
error: Optional[str] = None
created_at: float = field(default_factory=time.time)
completed_at: Optional[float] = None
metadata: dict = field(default_factory=dict)
class ImageGenerationService:
"""Production image generation service."""
def __init__(
self,
cache_enabled: bool = True,
max_prompt_length: int = 4000
):
self.client = OpenAI()
self.jobs: dict[str, ImageJob] = {}
self.cache: dict[str, str] = {}
self.cache_enabled = cache_enabled
self.max_prompt_length = max_prompt_length
def _get_cache_key(self, prompt: str, settings: dict) -> str:
"""Generate cache key for a request."""
content = f"{prompt}:{str(sorted(settings.items()))}"
return hashlib.md5(content.encode()).hexdigest()
def _validate_prompt(self, prompt: str) -> tuple[bool, str]:
"""Validate prompt before generation."""
if not prompt or not prompt.strip():
return False, "Prompt cannot be empty"
if len(prompt) > self.max_prompt_length:
return False, f"Prompt exceeds maximum length of {self.max_prompt_length}"
# Basic content filtering (extend as needed)
blocked_terms = ["explicit", "violent", "illegal"]
prompt_lower = prompt.lower()
for term in blocked_terms:
if term in prompt_lower:
return False, f"Prompt contains blocked content"
return True, ""
def create_job(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
user_id: str = None
) -> ImageJob:
"""Create an image generation job."""
# Validate prompt
valid, error = self._validate_prompt(prompt)
if not valid:
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.FAILED,
error=error
)
self.jobs[job.id] = job
return job
settings = {"size": size, "quality": quality, "style": style}
# Check cache
if self.cache_enabled:
cache_key = self._get_cache_key(prompt, settings)
if cache_key in self.cache:
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.COMPLETED,
url=self.cache[cache_key],
completed_at=time.time(),
metadata={"cached": True, "user_id": user_id}
)
self.jobs[job.id] = job
return job
# Create job
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.PENDING,
metadata={"settings": settings, "user_id": user_id}
)
self.jobs[job.id] = job
return job
def process_job(self, job_id: str) -> ImageJob:
"""Process a pending job."""
job = self.jobs.get(job_id)
if not job:
raise ValueError(f"Job not found: {job_id}")
if job.status != ImageStatus.PENDING:
return job
job.status = ImageStatus.GENERATING
settings = job.metadata.get("settings", {})
try:
response = self.client.images.generate(
model="dall-e-3",
prompt=job.prompt,
**settings
)
job.url = response.data[0].url
job.status = ImageStatus.COMPLETED
job.completed_at = time.time()
# Cache result
if self.cache_enabled:
cache_key = self._get_cache_key(job.prompt, settings)
self.cache[cache_key] = job.url
except Exception as e:
job.status = ImageStatus.FAILED
job.error = str(e)
job.completed_at = time.time()
return job
def get_job(self, job_id: str) -> Optional[ImageJob]:
"""Get job status."""
return self.jobs.get(job_id)
def get_usage_stats(self) -> dict:
"""Get service usage statistics."""
completed = [j for j in self.jobs.values() if j.status == ImageStatus.COMPLETED]
failed = [j for j in self.jobs.values() if j.status == ImageStatus.FAILED]
return {
"total_jobs": len(self.jobs),
"completed": len(completed),
"failed": len(failed),
"cache_hits": sum(1 for j in completed if j.metadata.get("cached")),
"avg_generation_time": sum(
j.completed_at - j.created_at for j in completed if j.completed_at
) / len(completed) if completed else 0
}
# Usage
service = ImageGenerationService()
# Create and process a job
job = service.create_job(
"A majestic dragon flying over a medieval castle",
quality="hd",
user_id="user_123"
)
print(f"Job created: {job.id}")
# Process the job
result = service.process_job(job.id)
print(f"Status: {result.status.value}")
if result.url:
print(f"Image URL: {result.url}")
else:
print(f"Error: {result.error}")
# Get stats
stats = service.get_usage_stats()
print(f"Stats: {stats}")
Image Generation Best Practices
- Use detailed, specific prompts for better results
- Include style, lighting, and composition details
- Implement content moderation for user prompts
- Cache generated images to reduce costs
- Use appropriate quality settings for your use case
Practice Exercise
Build an image generation platform that:- Accepts natural language descriptions
- Enhances prompts automatically for better results
- Supports multiple styles and configurations
- Implements content moderation
- Provides image variations and editing
- Prompt optimization for quality
- Cost management through caching
- Content safety filtering
- User experience with progress feedback