DALL-E Image Generation
Basic Image Generation
Copy
from openai import OpenAI
import base64
from pathlib import Path
def generate_image(
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid"
) -> str:
"""Generate an image from a text prompt."""
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
size=size, # 1024x1024, 1792x1024, or 1024x1792
quality=quality, # standard or hd
style=style, # vivid or natural
n=1
)
return response.data[0].url
def generate_and_save(
prompt: str,
output_path: str,
**kwargs
) -> str:
"""Generate image and save locally."""
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt=prompt,
response_format="b64_json",
**kwargs
)
image_data = base64.b64decode(response.data[0].b64_json)
with open(output_path, "wb") as f:
f.write(image_data)
return output_path
# Usage
# Get URL
url = generate_image(
"A futuristic city with flying cars at sunset, digital art style",
quality="hd"
)
print(f"Image URL: {url}")
# Save locally
path = generate_and_save(
"A cozy coffee shop interior with warm lighting",
"coffee_shop.png",
size="1792x1024"
)
print(f"Saved to: {path}")
Prompt Engineering for Images
Copy
from openai import OpenAI
from dataclasses import dataclass
@dataclass
class ImageStyle:
"""Predefined image style configurations."""
name: str
prefix: str
suffix: str
settings: dict
class ImagePromptBuilder:
"""Build effective image generation prompts."""
STYLES = {
"photorealistic": ImageStyle(
name="Photorealistic",
prefix="A photorealistic image of",
suffix="high detail, 8k resolution, professional photography",
settings={"style": "natural", "quality": "hd"}
),
"digital_art": ImageStyle(
name="Digital Art",
prefix="Digital art illustration of",
suffix="vibrant colors, detailed, trending on ArtStation",
settings={"style": "vivid", "quality": "hd"}
),
"oil_painting": ImageStyle(
name="Oil Painting",
prefix="Oil painting of",
suffix="classical style, rich textures, masterful brushwork",
settings={"style": "natural", "quality": "hd"}
),
"minimalist": ImageStyle(
name="Minimalist",
prefix="Minimalist illustration of",
suffix="simple shapes, clean lines, limited color palette",
settings={"style": "natural", "quality": "standard"}
),
"watercolor": ImageStyle(
name="Watercolor",
prefix="Watercolor painting of",
suffix="soft edges, flowing colors, artistic",
settings={"style": "natural", "quality": "hd"}
),
"3d_render": ImageStyle(
name="3D Render",
prefix="3D rendered image of",
suffix="octane render, ray tracing, studio lighting",
settings={"style": "vivid", "quality": "hd"}
),
}
def __init__(self):
self.client = OpenAI()
def build_prompt(
self,
subject: str,
style_name: str = "digital_art",
additional_details: list[str] = None
) -> tuple[str, dict]:
"""Build an optimized prompt with style."""
style = self.STYLES.get(style_name, self.STYLES["digital_art"])
parts = [style.prefix, subject]
if additional_details:
parts.extend(additional_details)
parts.append(style.suffix)
prompt = ", ".join(parts)
return prompt, style.settings
def generate(
self,
subject: str,
style_name: str = "digital_art",
additional_details: list[str] = None,
size: str = "1024x1024"
) -> str:
"""Generate image with styled prompt."""
prompt, settings = self.build_prompt(
subject, style_name, additional_details
)
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size=size,
**settings
)
return response.data[0].url
# Usage
builder = ImagePromptBuilder()
# Photorealistic style
url = builder.generate(
"a golden retriever playing in autumn leaves",
style_name="photorealistic"
)
# Digital art style
url = builder.generate(
"a futuristic robot gardener tending to plants",
style_name="digital_art",
additional_details=["peaceful scene", "morning light"]
)
# 3D render style
url = builder.generate(
"a glass sculpture of a hummingbird",
style_name="3d_render"
)
Batch Image Generation
Copy
from openai import OpenAI
import asyncio
from dataclasses import dataclass
import time
@dataclass
class ImageResult:
"""Result of an image generation request."""
prompt: str
url: str = None
error: str = None
generation_time: float = 0
class BatchImageGenerator:
"""Generate multiple images efficiently."""
def __init__(self, rate_limit: float = 1.0):
self.client = OpenAI()
self.rate_limit = rate_limit # Requests per second
self.last_request_time = 0
def _wait_for_rate_limit(self):
"""Wait to respect rate limits."""
elapsed = time.time() - self.last_request_time
wait_time = (1.0 / self.rate_limit) - elapsed
if wait_time > 0:
time.sleep(wait_time)
self.last_request_time = time.time()
def generate_single(
self,
prompt: str,
**kwargs
) -> ImageResult:
"""Generate a single image."""
self._wait_for_rate_limit()
start_time = time.time()
try:
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
**kwargs
)
return ImageResult(
prompt=prompt,
url=response.data[0].url,
generation_time=time.time() - start_time
)
except Exception as e:
return ImageResult(
prompt=prompt,
error=str(e),
generation_time=time.time() - start_time
)
def generate_batch(
self,
prompts: list[str],
progress_callback: callable = None,
**kwargs
) -> list[ImageResult]:
"""Generate multiple images sequentially."""
results = []
for i, prompt in enumerate(prompts):
result = self.generate_single(prompt, **kwargs)
results.append(result)
if progress_callback:
progress_callback(i + 1, len(prompts), result)
return results
def generate_variations_batch(
self,
base_prompt: str,
variations: list[str],
**kwargs
) -> list[ImageResult]:
"""Generate variations of a base prompt."""
prompts = [
f"{base_prompt}, {variation}"
for variation in variations
]
return self.generate_batch(prompts, **kwargs)
# Usage
generator = BatchImageGenerator(rate_limit=0.5) # 1 image per 2 seconds
# Generate multiple images
prompts = [
"A serene lake at dawn with mountains in background",
"A bustling night market in Tokyo",
"An ancient library with magical floating books"
]
def on_progress(current, total, result):
status = "OK" if result.url else f"Error: {result.error}"
print(f"[{current}/{total}] {result.prompt[:30]}... - {status}")
results = generator.generate_batch(prompts, progress_callback=on_progress)
# Generate variations
variations = ["spring", "summer", "autumn", "winter"]
results = generator.generate_variations_batch(
"A Japanese garden in",
variations
)
Image Editing
Inpainting with DALL-E
Copy
from openai import OpenAI
from PIL import Image
import io
import base64
class ImageEditor:
"""Edit images using AI."""
def __init__(self):
self.client = OpenAI()
def create_mask(
self,
image_size: tuple[int, int],
mask_region: tuple[int, int, int, int]
) -> bytes:
"""Create a mask for editing a specific region."""
# Create transparent mask
mask = Image.new("RGBA", image_size, (0, 0, 0, 255))
# Make the edit region transparent (white in mask = area to edit)
x1, y1, x2, y2 = mask_region
for x in range(x1, x2):
for y in range(y1, y2):
mask.putpixel((x, y), (255, 255, 255, 0))
# Save to bytes
buffer = io.BytesIO()
mask.save(buffer, format="PNG")
return buffer.getvalue()
def edit_image(
self,
image_path: str,
mask_path: str,
prompt: str,
size: str = "1024x1024"
) -> str:
"""Edit an image using a mask."""
with open(image_path, "rb") as img_file:
with open(mask_path, "rb") as mask_file:
response = self.client.images.edit(
model="dall-e-2", # DALL-E 2 for editing
image=img_file,
mask=mask_file,
prompt=prompt,
size=size,
n=1
)
return response.data[0].url
def create_variation(
self,
image_path: str,
n: int = 1,
size: str = "1024x1024"
) -> list[str]:
"""Create variations of an existing image."""
with open(image_path, "rb") as img_file:
response = self.client.images.create_variation(
model="dall-e-2",
image=img_file,
n=n,
size=size
)
return [img.url for img in response.data]
def prepare_image_for_editing(
self,
image_path: str,
target_size: int = 1024
) -> str:
"""Prepare an image for editing (resize and convert)."""
img = Image.open(image_path)
# Convert to RGBA
if img.mode != "RGBA":
img = img.convert("RGBA")
# Resize to square
img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
# Save to temporary file
output_path = image_path.rsplit(".", 1)[0] + "_prepared.png"
img.save(output_path, "PNG")
return output_path
# Usage
editor = ImageEditor()
# Prepare image
prepared = editor.prepare_image_for_editing("original.jpg")
# Create mask programmatically
mask_bytes = editor.create_mask(
(1024, 1024),
(300, 300, 700, 700) # Center region
)
# Save mask
with open("mask.png", "wb") as f:
f.write(mask_bytes)
# Edit the image
result_url = editor.edit_image(
prepared,
"mask.png",
"A beautiful sunset sky with pink and orange clouds"
)
# Create variations
variations = editor.create_variation("original.png", n=3)
AI-Powered Image Transformation
Copy
from openai import OpenAI
import base64
class ImageTransformer:
"""Transform images using vision and generation."""
def __init__(self):
self.client = OpenAI()
def analyze_image(self, image_path: str) -> str:
"""Analyze image content using GPT-4 Vision."""
with open(image_path, "rb") as f:
image_data = base64.standard_b64encode(f.read()).decode()
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_data}"
}
},
{
"type": "text",
"text": "Describe this image in detail for image generation. Include style, colors, composition, lighting, and mood."
}
]
}
]
)
return response.choices[0].message.content
def transform_style(
self,
image_path: str,
target_style: str
) -> str:
"""Transform image to a different style."""
# Analyze original image
description = self.analyze_image(image_path)
# Create prompt for new style
prompt = f"""Transform this scene to {target_style} style:
Original description: {description}
Create the same scene and composition but in {target_style} style."""
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1024x1024",
quality="hd"
)
return response.data[0].url
def extend_image(
self,
image_path: str,
direction: str,
extension_prompt: str
) -> str:
"""Conceptually extend an image (describe and regenerate larger scene)."""
# Analyze original
description = self.analyze_image(image_path)
direction_prompts = {
"left": "with additional content extending to the left",
"right": "with additional content extending to the right",
"up": "with additional content above the original scene",
"down": "with additional content below the original scene",
"wider": "as a wider panoramic version of this scene"
}
direction_text = direction_prompts.get(direction, direction)
prompt = f"""Create an extended version of this scene {direction_text}:
Original scene: {description}
Extension: {extension_prompt}
Maintain consistent style, lighting, and composition."""
response = self.client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1792x1024" if direction in ["left", "right", "wider"] else "1024x1792",
quality="hd"
)
return response.data[0].url
# Usage
transformer = ImageTransformer()
# Transform to different style
anime_url = transformer.transform_style(
"photo.jpg",
"anime"
)
# Extend image
extended_url = transformer.extend_image(
"landscape.jpg",
"wider",
"rolling hills with a distant village"
)
Production Patterns
Image Generation Service
Copy
from openai import OpenAI
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import uuid
import time
import hashlib
class ImageStatus(Enum):
PENDING = "pending"
GENERATING = "generating"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class ImageJob:
"""An image generation job."""
id: str
prompt: str
status: ImageStatus
url: Optional[str] = None
error: Optional[str] = None
created_at: float = field(default_factory=time.time)
completed_at: Optional[float] = None
metadata: dict = field(default_factory=dict)
class ImageGenerationService:
"""Production image generation service."""
def __init__(
self,
cache_enabled: bool = True,
max_prompt_length: int = 4000
):
self.client = OpenAI()
self.jobs: dict[str, ImageJob] = {}
self.cache: dict[str, str] = {}
self.cache_enabled = cache_enabled
self.max_prompt_length = max_prompt_length
def _get_cache_key(self, prompt: str, settings: dict) -> str:
"""Generate cache key for a request."""
content = f"{prompt}:{str(sorted(settings.items()))}"
return hashlib.md5(content.encode()).hexdigest()
def _validate_prompt(self, prompt: str) -> tuple[bool, str]:
"""Validate prompt before generation."""
if not prompt or not prompt.strip():
return False, "Prompt cannot be empty"
if len(prompt) > self.max_prompt_length:
return False, f"Prompt exceeds maximum length of {self.max_prompt_length}"
# Basic content filtering (extend as needed)
blocked_terms = ["explicit", "violent", "illegal"]
prompt_lower = prompt.lower()
for term in blocked_terms:
if term in prompt_lower:
return False, f"Prompt contains blocked content"
return True, ""
def create_job(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
user_id: str = None
) -> ImageJob:
"""Create an image generation job."""
# Validate prompt
valid, error = self._validate_prompt(prompt)
if not valid:
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.FAILED,
error=error
)
self.jobs[job.id] = job
return job
settings = {"size": size, "quality": quality, "style": style}
# Check cache
if self.cache_enabled:
cache_key = self._get_cache_key(prompt, settings)
if cache_key in self.cache:
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.COMPLETED,
url=self.cache[cache_key],
completed_at=time.time(),
metadata={"cached": True, "user_id": user_id}
)
self.jobs[job.id] = job
return job
# Create job
job = ImageJob(
id=str(uuid.uuid4()),
prompt=prompt,
status=ImageStatus.PENDING,
metadata={"settings": settings, "user_id": user_id}
)
self.jobs[job.id] = job
return job
def process_job(self, job_id: str) -> ImageJob:
"""Process a pending job."""
job = self.jobs.get(job_id)
if not job:
raise ValueError(f"Job not found: {job_id}")
if job.status != ImageStatus.PENDING:
return job
job.status = ImageStatus.GENERATING
settings = job.metadata.get("settings", {})
try:
response = self.client.images.generate(
model="dall-e-3",
prompt=job.prompt,
**settings
)
job.url = response.data[0].url
job.status = ImageStatus.COMPLETED
job.completed_at = time.time()
# Cache result
if self.cache_enabled:
cache_key = self._get_cache_key(job.prompt, settings)
self.cache[cache_key] = job.url
except Exception as e:
job.status = ImageStatus.FAILED
job.error = str(e)
job.completed_at = time.time()
return job
def get_job(self, job_id: str) -> Optional[ImageJob]:
"""Get job status."""
return self.jobs.get(job_id)
def get_usage_stats(self) -> dict:
"""Get service usage statistics."""
completed = [j for j in self.jobs.values() if j.status == ImageStatus.COMPLETED]
failed = [j for j in self.jobs.values() if j.status == ImageStatus.FAILED]
return {
"total_jobs": len(self.jobs),
"completed": len(completed),
"failed": len(failed),
"cache_hits": sum(1 for j in completed if j.metadata.get("cached")),
"avg_generation_time": sum(
j.completed_at - j.created_at for j in completed if j.completed_at
) / len(completed) if completed else 0
}
# Usage
service = ImageGenerationService()
# Create and process a job
job = service.create_job(
"A majestic dragon flying over a medieval castle",
quality="hd",
user_id="user_123"
)
print(f"Job created: {job.id}")
# Process the job
result = service.process_job(job.id)
print(f"Status: {result.status.value}")
if result.url:
print(f"Image URL: {result.url}")
else:
print(f"Error: {result.error}")
# Get stats
stats = service.get_usage_stats()
print(f"Stats: {stats}")
Image Generation Best Practices
- Use detailed, specific prompts for better results
- Include style, lighting, and composition details
- Implement content moderation for user prompts
- Cache generated images to reduce costs
- Use appropriate quality settings for your use case
Practice Exercise
Build an image generation platform that:- Accepts natural language descriptions
- Enhances prompts automatically for better results
- Supports multiple styles and configurations
- Implements content moderation
- Provides image variations and editing
- Prompt optimization for quality
- Cost management through caching
- Content safety filtering
- User experience with progress feedback