Skip to main content

Documentation Index

Fetch the complete documentation index at: https://resources.devweekends.com/llms.txt

Use this file to discover all available pages before exploring further.

AI image generation enables creating, editing, and transforming images through natural language. This chapter covers practical patterns for production image generation.

DALL-E Image Generation

Basic Image Generation

from openai import OpenAI
import base64
from pathlib import Path


def generate_image(
    prompt: str,
    size: str = "1024x1024",
    quality: str = "standard",
    style: str = "vivid"
) -> str:
    """Generate an image from a text prompt."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size=size,  # 1024x1024, 1792x1024, or 1024x1792
        quality=quality,  # standard or hd
        style=style,  # vivid or natural
        n=1
    )
    
    return response.data[0].url


def generate_and_save(
    prompt: str,
    output_path: str,
    **kwargs
) -> str:
    """Generate image and save locally."""
    client = OpenAI()
    
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        response_format="b64_json",
        **kwargs
    )
    
    image_data = base64.b64decode(response.data[0].b64_json)
    
    with open(output_path, "wb") as f:
        f.write(image_data)
    
    return output_path


# Usage
# Get URL
url = generate_image(
    "A futuristic city with flying cars at sunset, digital art style",
    quality="hd"
)
print(f"Image URL: {url}")

# Save locally
path = generate_and_save(
    "A cozy coffee shop interior with warm lighting",
    "coffee_shop.png",
    size="1792x1024"
)
print(f"Saved to: {path}")

Prompt Engineering for Images

from openai import OpenAI
from dataclasses import dataclass


@dataclass
class ImageStyle:
    """Predefined image style configurations."""
    name: str
    prefix: str
    suffix: str
    settings: dict


class ImagePromptBuilder:
    """Build effective image generation prompts."""
    
    STYLES = {
        "photorealistic": ImageStyle(
            name="Photorealistic",
            prefix="A photorealistic image of",
            suffix="high detail, 8k resolution, professional photography",
            settings={"style": "natural", "quality": "hd"}
        ),
        "digital_art": ImageStyle(
            name="Digital Art",
            prefix="Digital art illustration of",
            suffix="vibrant colors, detailed, trending on ArtStation",
            settings={"style": "vivid", "quality": "hd"}
        ),
        "oil_painting": ImageStyle(
            name="Oil Painting",
            prefix="Oil painting of",
            suffix="classical style, rich textures, masterful brushwork",
            settings={"style": "natural", "quality": "hd"}
        ),
        "minimalist": ImageStyle(
            name="Minimalist",
            prefix="Minimalist illustration of",
            suffix="simple shapes, clean lines, limited color palette",
            settings={"style": "natural", "quality": "standard"}
        ),
        "watercolor": ImageStyle(
            name="Watercolor",
            prefix="Watercolor painting of",
            suffix="soft edges, flowing colors, artistic",
            settings={"style": "natural", "quality": "hd"}
        ),
        "3d_render": ImageStyle(
            name="3D Render",
            prefix="3D rendered image of",
            suffix="octane render, ray tracing, studio lighting",
            settings={"style": "vivid", "quality": "hd"}
        ),
    }
    
    def __init__(self):
        self.client = OpenAI()
    
    def build_prompt(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None
    ) -> tuple[str, dict]:
        """Build an optimized prompt with style."""
        style = self.STYLES.get(style_name, self.STYLES["digital_art"])
        
        parts = [style.prefix, subject]
        
        if additional_details:
            parts.extend(additional_details)
        
        parts.append(style.suffix)
        
        prompt = ", ".join(parts)
        
        return prompt, style.settings
    
    def generate(
        self,
        subject: str,
        style_name: str = "digital_art",
        additional_details: list[str] = None,
        size: str = "1024x1024"
    ) -> str:
        """Generate image with styled prompt."""
        prompt, settings = self.build_prompt(
            subject, style_name, additional_details
        )
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size=size,
            **settings
        )
        
        return response.data[0].url


# Usage
builder = ImagePromptBuilder()

# Photorealistic style
url = builder.generate(
    "a golden retriever playing in autumn leaves",
    style_name="photorealistic"
)

# Digital art style
url = builder.generate(
    "a futuristic robot gardener tending to plants",
    style_name="digital_art",
    additional_details=["peaceful scene", "morning light"]
)

# 3D render style
url = builder.generate(
    "a glass sculpture of a hummingbird",
    style_name="3d_render"
)

Batch Image Generation

from openai import OpenAI
import asyncio
from dataclasses import dataclass
import time


@dataclass
class ImageResult:
    """Result of an image generation request."""
    prompt: str
    url: str = None
    error: str = None
    generation_time: float = 0


class BatchImageGenerator:
    """Generate multiple images efficiently."""
    
    def __init__(self, rate_limit: float = 1.0):
        self.client = OpenAI()
        self.rate_limit = rate_limit  # Requests per second
        self.last_request_time = 0
    
    def _wait_for_rate_limit(self):
        """Wait to respect rate limits."""
        elapsed = time.time() - self.last_request_time
        wait_time = (1.0 / self.rate_limit) - elapsed
        
        if wait_time > 0:
            time.sleep(wait_time)
        
        self.last_request_time = time.time()
    
    def generate_single(
        self,
        prompt: str,
        **kwargs
    ) -> ImageResult:
        """Generate a single image."""
        self._wait_for_rate_limit()
        
        start_time = time.time()
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=prompt,
                **kwargs
            )
            
            return ImageResult(
                prompt=prompt,
                url=response.data[0].url,
                generation_time=time.time() - start_time
            )
        except Exception as e:
            return ImageResult(
                prompt=prompt,
                error=str(e),
                generation_time=time.time() - start_time
            )
    
    def generate_batch(
        self,
        prompts: list[str],
        progress_callback: callable = None,
        **kwargs
    ) -> list[ImageResult]:
        """Generate multiple images sequentially."""
        results = []
        
        for i, prompt in enumerate(prompts):
            result = self.generate_single(prompt, **kwargs)
            results.append(result)
            
            if progress_callback:
                progress_callback(i + 1, len(prompts), result)
        
        return results
    
    def generate_variations_batch(
        self,
        base_prompt: str,
        variations: list[str],
        **kwargs
    ) -> list[ImageResult]:
        """Generate variations of a base prompt."""
        prompts = [
            f"{base_prompt}, {variation}"
            for variation in variations
        ]
        
        return self.generate_batch(prompts, **kwargs)


# Usage
generator = BatchImageGenerator(rate_limit=0.5)  # 1 image per 2 seconds

# Generate multiple images
prompts = [
    "A serene lake at dawn with mountains in background",
    "A bustling night market in Tokyo",
    "An ancient library with magical floating books"
]

def on_progress(current, total, result):
    status = "OK" if result.url else f"Error: {result.error}"
    print(f"[{current}/{total}] {result.prompt[:30]}... - {status}")

results = generator.generate_batch(prompts, progress_callback=on_progress)

# Generate variations
variations = ["spring", "summer", "autumn", "winter"]
results = generator.generate_variations_batch(
    "A Japanese garden in",
    variations
)

Image Editing

Inpainting with DALL-E

from openai import OpenAI
from PIL import Image
import io
import base64


class ImageEditor:
    """Edit images using AI."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def create_mask(
        self,
        image_size: tuple[int, int],
        mask_region: tuple[int, int, int, int]
    ) -> bytes:
        """Create a mask for editing a specific region."""
        # Create transparent mask
        mask = Image.new("RGBA", image_size, (0, 0, 0, 255))
        
        # Make the edit region transparent (white in mask = area to edit)
        x1, y1, x2, y2 = mask_region
        for x in range(x1, x2):
            for y in range(y1, y2):
                mask.putpixel((x, y), (255, 255, 255, 0))
        
        # Save to bytes
        buffer = io.BytesIO()
        mask.save(buffer, format="PNG")
        return buffer.getvalue()
    
    def edit_image(
        self,
        image_path: str,
        mask_path: str,
        prompt: str,
        size: str = "1024x1024"
    ) -> str:
        """Edit an image using a mask."""
        with open(image_path, "rb") as img_file:
            with open(mask_path, "rb") as mask_file:
                response = self.client.images.edit(
                    model="dall-e-2",  # DALL-E 2 for editing
                    image=img_file,
                    mask=mask_file,
                    prompt=prompt,
                    size=size,
                    n=1
                )
        
        return response.data[0].url
    
    def create_variation(
        self,
        image_path: str,
        n: int = 1,
        size: str = "1024x1024"
    ) -> list[str]:
        """Create variations of an existing image."""
        with open(image_path, "rb") as img_file:
            response = self.client.images.create_variation(
                model="dall-e-2",
                image=img_file,
                n=n,
                size=size
            )
        
        return [img.url for img in response.data]
    
    def prepare_image_for_editing(
        self,
        image_path: str,
        target_size: int = 1024
    ) -> str:
        """Prepare an image for editing (resize and convert)."""
        img = Image.open(image_path)
        
        # Convert to RGBA
        if img.mode != "RGBA":
            img = img.convert("RGBA")
        
        # Resize to square
        img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
        
        # Save to temporary file
        output_path = image_path.rsplit(".", 1)[0] + "_prepared.png"
        img.save(output_path, "PNG")
        
        return output_path


# Usage
editor = ImageEditor()

# Prepare image
prepared = editor.prepare_image_for_editing("original.jpg")

# Create mask programmatically
mask_bytes = editor.create_mask(
    (1024, 1024),
    (300, 300, 700, 700)  # Center region
)

# Save mask
with open("mask.png", "wb") as f:
    f.write(mask_bytes)

# Edit the image
result_url = editor.edit_image(
    prepared,
    "mask.png",
    "A beautiful sunset sky with pink and orange clouds"
)

# Create variations
variations = editor.create_variation("original.png", n=3)

AI-Powered Image Transformation

from openai import OpenAI
import base64


class ImageTransformer:
    """Transform images using vision and generation."""
    
    def __init__(self):
        self.client = OpenAI()
    
    def analyze_image(self, image_path: str) -> str:
        """Analyze image content using GPT-4 Vision."""
        with open(image_path, "rb") as f:
            image_data = base64.standard_b64encode(f.read()).decode()
        
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{image_data}"
                            }
                        },
                        {
                            "type": "text",
                            "text": "Describe this image in detail for image generation. Include style, colors, composition, lighting, and mood."
                        }
                    ]
                }
            ]
        )
        
        return response.choices[0].message.content
    
    def transform_style(
        self,
        image_path: str,
        target_style: str
    ) -> str:
        """Transform image to a different style."""
        # Analyze original image
        description = self.analyze_image(image_path)
        
        # Create prompt for new style
        prompt = f"""Transform this scene to {target_style} style:

Original description: {description}

Create the same scene and composition but in {target_style} style."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1024x1024",
            quality="hd"
        )
        
        return response.data[0].url
    
    def extend_image(
        self,
        image_path: str,
        direction: str,
        extension_prompt: str
    ) -> str:
        """Conceptually extend an image (describe and regenerate larger scene)."""
        # Analyze original
        description = self.analyze_image(image_path)
        
        direction_prompts = {
            "left": "with additional content extending to the left",
            "right": "with additional content extending to the right",
            "up": "with additional content above the original scene",
            "down": "with additional content below the original scene",
            "wider": "as a wider panoramic version of this scene"
        }
        
        direction_text = direction_prompts.get(direction, direction)
        
        prompt = f"""Create an extended version of this scene {direction_text}:

Original scene: {description}

Extension: {extension_prompt}

Maintain consistent style, lighting, and composition."""
        
        response = self.client.images.generate(
            model="dall-e-3",
            prompt=prompt,
            size="1792x1024" if direction in ["left", "right", "wider"] else "1024x1792",
            quality="hd"
        )
        
        return response.data[0].url


# Usage
transformer = ImageTransformer()

# Transform to different style
anime_url = transformer.transform_style(
    "photo.jpg",
    "anime"
)

# Extend image
extended_url = transformer.extend_image(
    "landscape.jpg",
    "wider",
    "rolling hills with a distant village"
)

Production Patterns

Image Generation Service

from openai import OpenAI
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import uuid
import time
import hashlib


class ImageStatus(Enum):
    PENDING = "pending"
    GENERATING = "generating"
    COMPLETED = "completed"
    FAILED = "failed"


@dataclass
class ImageJob:
    """An image generation job."""
    id: str
    prompt: str
    status: ImageStatus
    url: Optional[str] = None
    error: Optional[str] = None
    created_at: float = field(default_factory=time.time)
    completed_at: Optional[float] = None
    metadata: dict = field(default_factory=dict)


class ImageGenerationService:
    """Production image generation service."""
    
    def __init__(
        self,
        cache_enabled: bool = True,
        max_prompt_length: int = 4000
    ):
        self.client = OpenAI()
        self.jobs: dict[str, ImageJob] = {}
        self.cache: dict[str, str] = {}
        self.cache_enabled = cache_enabled
        self.max_prompt_length = max_prompt_length
    
    def _get_cache_key(self, prompt: str, settings: dict) -> str:
        """Generate cache key for a request."""
        content = f"{prompt}:{str(sorted(settings.items()))}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def _validate_prompt(self, prompt: str) -> tuple[bool, str]:
        """Validate prompt before generation."""
        if not prompt or not prompt.strip():
            return False, "Prompt cannot be empty"
        
        if len(prompt) > self.max_prompt_length:
            return False, f"Prompt exceeds maximum length of {self.max_prompt_length}"
        
        # Basic content filtering (extend as needed)
        blocked_terms = ["explicit", "violent", "illegal"]
        prompt_lower = prompt.lower()
        
        for term in blocked_terms:
            if term in prompt_lower:
                return False, f"Prompt contains blocked content"
        
        return True, ""
    
    def create_job(
        self,
        prompt: str,
        size: str = "1024x1024",
        quality: str = "standard",
        style: str = "vivid",
        user_id: str = None
    ) -> ImageJob:
        """Create an image generation job."""
        # Validate prompt
        valid, error = self._validate_prompt(prompt)
        if not valid:
            job = ImageJob(
                id=str(uuid.uuid4()),
                prompt=prompt,
                status=ImageStatus.FAILED,
                error=error
            )
            self.jobs[job.id] = job
            return job
        
        settings = {"size": size, "quality": quality, "style": style}
        
        # Check cache
        if self.cache_enabled:
            cache_key = self._get_cache_key(prompt, settings)
            if cache_key in self.cache:
                job = ImageJob(
                    id=str(uuid.uuid4()),
                    prompt=prompt,
                    status=ImageStatus.COMPLETED,
                    url=self.cache[cache_key],
                    completed_at=time.time(),
                    metadata={"cached": True, "user_id": user_id}
                )
                self.jobs[job.id] = job
                return job
        
        # Create job
        job = ImageJob(
            id=str(uuid.uuid4()),
            prompt=prompt,
            status=ImageStatus.PENDING,
            metadata={"settings": settings, "user_id": user_id}
        )
        self.jobs[job.id] = job
        
        return job
    
    def process_job(self, job_id: str) -> ImageJob:
        """Process a pending job."""
        job = self.jobs.get(job_id)
        if not job:
            raise ValueError(f"Job not found: {job_id}")
        
        if job.status != ImageStatus.PENDING:
            return job
        
        job.status = ImageStatus.GENERATING
        settings = job.metadata.get("settings", {})
        
        try:
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=job.prompt,
                **settings
            )
            
            job.url = response.data[0].url
            job.status = ImageStatus.COMPLETED
            job.completed_at = time.time()
            
            # Cache result
            if self.cache_enabled:
                cache_key = self._get_cache_key(job.prompt, settings)
                self.cache[cache_key] = job.url
            
        except Exception as e:
            job.status = ImageStatus.FAILED
            job.error = str(e)
            job.completed_at = time.time()
        
        return job
    
    def get_job(self, job_id: str) -> Optional[ImageJob]:
        """Get job status."""
        return self.jobs.get(job_id)
    
    def get_usage_stats(self) -> dict:
        """Get service usage statistics."""
        completed = [j for j in self.jobs.values() if j.status == ImageStatus.COMPLETED]
        failed = [j for j in self.jobs.values() if j.status == ImageStatus.FAILED]
        
        return {
            "total_jobs": len(self.jobs),
            "completed": len(completed),
            "failed": len(failed),
            "cache_hits": sum(1 for j in completed if j.metadata.get("cached")),
            "avg_generation_time": sum(
                j.completed_at - j.created_at for j in completed if j.completed_at
            ) / len(completed) if completed else 0
        }


# Usage
service = ImageGenerationService()

# Create and process a job
job = service.create_job(
    "A majestic dragon flying over a medieval castle",
    quality="hd",
    user_id="user_123"
)

print(f"Job created: {job.id}")

# Process the job
result = service.process_job(job.id)
print(f"Status: {result.status.value}")

if result.url:
    print(f"Image URL: {result.url}")
else:
    print(f"Error: {result.error}")

# Get stats
stats = service.get_usage_stats()
print(f"Stats: {stats}")
Image Generation Best Practices
  • Use detailed, specific prompts for better results
  • Include style, lighting, and composition details
  • Implement content moderation for user prompts
  • Cache generated images to reduce costs
  • Use appropriate quality settings for your use case

Practice Exercise

Build an image generation platform that:
  1. Accepts natural language descriptions
  2. Enhances prompts automatically for better results
  3. Supports multiple styles and configurations
  4. Implements content moderation
  5. Provides image variations and editing
Focus on:
  • Prompt optimization for quality
  • Cost management through caching
  • Content safety filtering
  • User experience with progress feedback

Interview Deep-Dive

Strong Answer:
  • The API call itself is the easy part. The production concerns I would address fall into five categories: content safety, cost management, latency handling, storage, and user experience.
  • Content safety is the highest priority for consumer-facing products. I would implement a two-layer moderation system. First, a pre-generation filter that checks the user’s prompt against OpenAI’s moderation API and a custom blocklist before the image generation call even fires. Second, a post-generation filter that runs the generated image through a vision model or image classification model to detect content that slipped through the text filter. DALL-E has its own content policy, but I would not rely solely on the provider’s safety layer because false negatives happen. At one company a user’s prompt about “shooting stars” generated an image flagged by their community guidelines — the text filter missed it because the prompt was innocuous, but the model interpreted it ambiguously.
  • Cost management means implementing caching for identical or near-identical prompts (hash the prompt plus generation parameters as a cache key), setting per-user generation quotas (free tier gets 10 images per day, paid gets 100), and choosing quality settings based on the use case (standard quality for previews, HD quality only when the user explicitly requests it). DALL-E 3 HD costs roughly 0.08perimageversus0.08 per image versus 0.04 for standard — at scale that doubles your spend.
  • Latency is 10-30 seconds per image. I would never make the user wait synchronously. The pattern is: accept the request, return a job ID immediately, process in a background worker, notify the user via WebSocket or push notification when the image is ready. Show a skeleton or placeholder in the UI immediately.
  • Storage means I would never serve images from OpenAI’s temporary URLs (they expire after an hour). I download the image to S3, generate a CDN-fronted permanent URL, and serve that. I also store the generation metadata (prompt, settings, user, timestamp) for analytics and content moderation audit trails.
Red Flags: Candidate only discusses the API call without mentioning safety, cost, or UX concerns. Another red flag is relying on temporary OpenAI URLs for production serving.Follow-up: How would you implement caching for image generation specifically, given that the same prompt can intentionally produce different images?Image generation caching is trickier than LLM text caching because users often want variety. My approach is context-dependent. For product use cases like generating thumbnail variants for A/B testing, I cache aggressively: same prompt plus same seed produces the same image, so I use the prompt hash plus seed as the cache key. For consumer creative tools where variety is the point, I do not cache individual results but instead implement a “similar prompt” detector. If a user submits a prompt that is semantically identical to one they submitted in the last hour (embedding similarity above 0.98), I show them their previous results and ask “Want to generate a new variation or use one of these?” This saves 30-40% of redundant generations from users who rephrase slightly because they did not like the first result. The cache eviction strategy is LRU with a 24-hour TTL and a storage cap per user.
Strong Answer:
  • DALL-E 3 takes your submitted prompt and rewrites it into a more detailed version using an internal LLM before the diffusion model generates the image. The revised prompt is returned in the API response under response.data[0].revised_prompt. This is great for casual users because it enhances vague prompts, but it is a significant problem for applications that need precise control.
  • The issue is that your prompt engineering work gets partially overridden. If I carefully craft a prompt specifying “no text in the image, minimalist style, only blue and white colors,” the rewrite might add details that conflict with my constraints. I have seen the rewriter add elements like “with elegant serif typography” to prompts that explicitly said no text.
  • The workaround strategies depend on the use case. For maximum control, I use DALL-E 2 for editing and inpainting tasks since it uses your exact prompt without rewriting. For DALL-E 3 generation, I make my constraints extremely explicit and redundant in the prompt: instead of “no text,” I write “absolutely no text, no letters, no words, no typography, no writing of any kind anywhere in the image.” Redundancy helps because the rewriter is less likely to override a constraint that appears multiple times.
  • I also log both the submitted prompt and the revised prompt for every generation. This is essential for debugging: when a user reports “the image does not match what I asked for,” I can compare the two prompts and identify where the rewrite diverged from the user’s intent. This logging also feeds back into prompt engineering — I analyze which types of instructions survive the rewrite intact and optimize my prompt templates accordingly.
Red Flags: Candidate does not know about prompt rewriting in DALL-E 3, cannot explain the difference between DALL-E 2 and DALL-E 3 capabilities, or suggests the model always generates exactly what you prompt.Follow-up: How would you build a style-consistent image generation system where every image needs to match a specific brand aesthetic?Style consistency across multiple generations is one of the hardest problems with current image generation models because there is no native “style reference” input. My approach uses a multi-part prompt template with three sections: a frozen style preamble that describes the brand aesthetic in exhaustive detail (color palette with hex codes, art style, lighting characteristics, composition rules, specific things to avoid), a variable subject section for what changes per image, and a frozen style suffix that reinforces the key constraints. I test this template against 50+ diverse subjects and manually review for style drift. The subjects that produce off-brand results get their prompts refined with additional constraints. I also maintain a visual QA step: after generation, I pass the image and the brand guidelines to GPT-4o Vision and ask it to rate style compliance on specific dimensions (color accuracy, composition, mood). Images below a threshold get regenerated with a modified prompt. This automated QA catches about 80% of off-brand images before a human ever sees them.
Strong Answer:
  • At 0.040.04-0.08 per image, $15,000 per month means roughly 200K-375K images generated monthly. I would attack this on four fronts: eliminate waste, optimize settings, add caching, and shift volume.
  • Eliminate waste first. I would audit the generation logs for patterns: how many images are generated but never viewed (abort before load), how many users generate 10+ variants of the same concept (prompt iteration pattern), and how many are automated/bot traffic hitting the API. At one company, 20% of our image generation spend was from a single user running an automated script. Rate limiting and abuse detection alone cut waste by 15%.
  • Optimize settings: switch from HD quality (0.08)tostandardquality(0.08) to standard quality (0.04) for thumbnails, previews, and first-draft generations. Only use HD when the user explicitly requests the final high-res version. This alone can cut costs by 30-40% if most generations are exploration phase. Also, use 1024x1024 as default instead of the larger sizes unless the layout requires landscape or portrait.
  • Caching: implement the semantic caching strategy I described earlier. For applications where the same types of images are requested repeatedly (product category headers, blog illustrations for common topics), pre-generate a library of images and serve from cache. Even a 20% cache hit rate saves $3,000 per month at this scale.
  • Shift volume: for lower-quality-acceptable use cases (placeholder images, draft mockups, internal tooling), consider Stable Diffusion running on your own infrastructure or a cheaper provider. Self-hosted SDXL on a single A10 GPU costs about 1.50/hourandcangenerateroughly2000imagesperhourat1.50/hour and can generate roughly 2000 images per hour at 0.00075 per image — 50x cheaper than DALL-E 3. The quality gap is real but acceptable for many internal use cases.
  • Combined, these four strategies realistically hit the 60% cost reduction target: 15% from waste elimination, 20% from settings optimization, 10% from caching, and 15% from volume shifting to cheaper alternatives.
Red Flags: Candidate only suggests “use a cheaper model” without analyzing the actual cost drivers, does not mention caching, or proposes degrading quality uniformly instead of tiering quality by use case.Follow-up: How do you measure “user experience did not degrade” when you switch from HD to standard quality?I would run this as a controlled A/B test on the user cohort that generates the most images. The primary metric is generation-to-download rate: what percentage of generated images do users actually save or use. If standard quality images have the same download rate as HD, users cannot tell the difference or do not care. Secondary metrics are user satisfaction scores (if we survey) and support tickets mentioning image quality. I would also track re-generation rate: if users generate the same prompt more times with standard quality than they did with HD, it suggests they are unsatisfied with results and retrying. The test needs at least two weeks and a few thousand generations per variant to reach significance. The key nuance is segmenting by use case — professional designers will notice the quality drop immediately, while casual users generating social media content might not care at all. I would keep HD as default for pro-tier users and switch to standard only for free-tier users.