Efficient Neural Network Design
The Efficiency Challenge
Mobile and edge devices have strict constraints:- Compute: Limited FLOPs/second
- Memory: Small RAM and storage
- Power: Battery limitations
- Latency: Real-time requirements
- Small (fewer parameters)
- Fast (fewer FLOPs)
- Accurate (still useful!)
Copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from typing import List, Tuple, Optional, Callable
torch.manual_seed(42)
Depthwise Separable Convolutions
The foundation of efficient architectures: Standard Conv: O(K2⋅Cin⋅Cout⋅H⋅W) Depthwise Separable: O(K2⋅Cin⋅H⋅W)+O(Cin⋅Cout⋅H⋅W)Copy
class DepthwiseSeparableConv(nn.Module):
"""
Depthwise Separable Convolution.
Factorizes standard conv into:
1. Depthwise: Spatial filtering per channel
2. Pointwise: Channel mixing via 1x1 conv
Reduction factor: K² (typically 8-9x fewer FLOPs)
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int = 3,
stride: int = 1,
padding: int = 1,
bias: bool = False
):
super().__init__()
# Depthwise: each input channel has its own filter
self.depthwise = nn.Conv2d(
in_channels,
in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=in_channels, # Key: groups = in_channels
bias=bias
)
# Pointwise: 1x1 conv for channel mixing
self.pointwise = nn.Conv2d(
in_channels,
out_channels,
kernel_size=1,
bias=bias
)
self.bn1 = nn.BatchNorm2d(in_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.depthwise(x)
x = self.bn1(x)
x = F.relu(x)
x = self.pointwise(x)
x = self.bn2(x)
x = F.relu(x)
return x
@staticmethod
def compute_savings(in_c: int, out_c: int, k: int = 3) -> float:
"""Compute FLOPs savings ratio."""
standard = k * k * in_c * out_c
separable = k * k * in_c + in_c * out_c
return standard / separable
# Example: savings for 256 -> 256 with 3x3 kernel
savings = DepthwiseSeparableConv.compute_savings(256, 256, 3)
print(f"FLOPs reduction: {savings:.1f}x") # ~8-9x
MobileNet Family
MobileNetV1
Copy
class MobileNetV1Block(nn.Module):
"""MobileNetV1 basic block."""
def __init__(
self,
in_channels: int,
out_channels: int,
stride: int = 1
):
super().__init__()
self.conv = DepthwiseSeparableConv(
in_channels, out_channels, stride=stride
)
def forward(self, x):
return self.conv(x)
class MobileNetV1(nn.Module):
"""
MobileNetV1: Efficient CNN using depthwise separable convolutions.
Key ideas:
1. Replace all standard convs with depthwise separable
2. Width multiplier α: scale all channel counts
3. Resolution multiplier ρ: scale input resolution
"""
def __init__(
self,
num_classes: int = 1000,
width_mult: float = 1.0,
input_size: int = 224
):
super().__init__()
self.width_mult = width_mult
def c(channels):
return int(channels * width_mult)
# Configuration: (out_channels, stride)
config = [
(64, 1),
(128, 2),
(128, 1),
(256, 2),
(256, 1),
(512, 2),
(512, 1), (512, 1), (512, 1), (512, 1), (512, 1), # 5x
(1024, 2),
(1024, 1)
]
# First conv
self.conv1 = nn.Sequential(
nn.Conv2d(3, c(32), 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c(32)),
nn.ReLU(inplace=True)
)
# Build blocks
layers = []
in_channels = c(32)
for out_channels, stride in config:
layers.append(MobileNetV1Block(in_channels, c(out_channels), stride))
in_channels = c(out_channels)
self.features = nn.Sequential(*layers)
# Classifier
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(c(1024), num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
MobileNetV2
Copy
class InvertedResidual(nn.Module):
"""
MobileNetV2 Inverted Residual Block.
Key innovations:
1. Inverted bottleneck: expand -> depthwise -> project
2. Linear bottleneck: no ReLU after final projection
3. Residual connection when stride=1 and in_c=out_c
"""
def __init__(
self,
in_channels: int,
out_channels: int,
stride: int = 1,
expand_ratio: int = 6
):
super().__init__()
self.stride = stride
self.use_residual = stride == 1 and in_channels == out_channels
hidden_dim = in_channels * expand_ratio
layers = []
# Expansion (1x1 conv)
if expand_ratio != 1:
layers.extend([
nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True)
])
# Depthwise conv
layers.extend([
nn.Conv2d(
hidden_dim, hidden_dim, 3,
stride=stride, padding=1, groups=hidden_dim, bias=False
),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True)
])
# Projection (linear - no activation!)
layers.extend([
nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_residual:
return x + self.conv(x)
return self.conv(x)
class MobileNetV2(nn.Module):
"""
MobileNetV2: Inverted Residuals and Linear Bottlenecks.
Improvements over V1:
- Inverted residual structure
- Linear bottlenecks (preserve information)
- Residual connections for better gradients
"""
def __init__(self, num_classes: int = 1000, width_mult: float = 1.0):
super().__init__()
def c(channels):
return max(8, int(channels * width_mult))
# (expand_ratio, out_channels, num_blocks, stride)
config = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
# First conv
self.features = [nn.Sequential(
nn.Conv2d(3, c(32), 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c(32)),
nn.ReLU6(inplace=True)
)]
# Build inverted residual blocks
in_channels = c(32)
for t, out_c, n, s in config:
out_channels = c(out_c)
for i in range(n):
stride = s if i == 0 else 1
self.features.append(
InvertedResidual(in_channels, out_channels, stride, t)
)
in_channels = out_channels
# Last conv
self.features.append(nn.Sequential(
nn.Conv2d(in_channels, c(1280), 1, bias=False),
nn.BatchNorm2d(c(1280)),
nn.ReLU6(inplace=True)
))
self.features = nn.Sequential(*self.features)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(c(1280), num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
MobileNetV3
Copy
class SEBlock(nn.Module):
"""Squeeze-and-Excitation block."""
def __init__(self, channels: int, reduction: int = 4):
super().__init__()
reduced = max(1, channels // reduction)
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(channels, reduced, 1),
nn.ReLU(inplace=True),
nn.Conv2d(reduced, channels, 1),
nn.Hardsigmoid(inplace=True)
)
def forward(self, x):
return x * self.se(x)
class MobileNetV3Block(nn.Module):
"""
MobileNetV3 block with SE and h-swish.
Improvements:
- Squeeze-and-Excitation attention
- h-swish activation (efficient approximation)
- Neural Architecture Search optimized
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int,
stride: int,
expand_ratio: float,
use_se: bool,
use_hs: bool # Use h-swish
):
super().__init__()
self.use_residual = stride == 1 and in_channels == out_channels
hidden_dim = int(in_channels * expand_ratio)
# Choose activation
activation = nn.Hardswish if use_hs else nn.ReLU
layers = []
# Expansion
if expand_ratio != 1:
layers.extend([
nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
nn.BatchNorm2d(hidden_dim),
activation(inplace=True)
])
# Depthwise
layers.extend([
nn.Conv2d(
hidden_dim, hidden_dim, kernel_size,
stride=stride, padding=kernel_size//2,
groups=hidden_dim, bias=False
),
nn.BatchNorm2d(hidden_dim),
activation(inplace=True)
])
# SE block
if use_se:
layers.append(SEBlock(hidden_dim))
# Projection
layers.extend([
nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_residual:
return x + self.conv(x)
return self.conv(x)
class MobileNetV3Small(nn.Module):
"""MobileNetV3-Small: Optimized for mobile."""
def __init__(self, num_classes: int = 1000):
super().__init__()
# Configuration from NAS
# (kernel, expand, out, SE, HS, stride)
config = [
(3, 1, 16, True, False, 2),
(3, 4.5, 24, False, False, 2),
(3, 3.67, 24, False, False, 1),
(5, 4, 40, True, True, 2),
(5, 6, 40, True, True, 1),
(5, 6, 40, True, True, 1),
(5, 3, 48, True, True, 1),
(5, 3, 48, True, True, 1),
(5, 6, 96, True, True, 2),
(5, 6, 96, True, True, 1),
(5, 6, 96, True, True, 1),
]
# First conv
layers = [nn.Sequential(
nn.Conv2d(3, 16, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.Hardswish(inplace=True)
)]
# Build blocks
in_c = 16
for k, exp, out_c, se, hs, s in config:
layers.append(MobileNetV3Block(in_c, out_c, k, s, exp, se, hs))
in_c = out_c
# Last stages
layers.append(nn.Sequential(
nn.Conv2d(96, 576, 1, bias=False),
nn.BatchNorm2d(576),
nn.Hardswish(inplace=True)
))
self.features = nn.Sequential(*layers)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Sequential(
nn.Linear(576, 1024),
nn.Hardswish(inplace=True),
nn.Dropout(0.2),
nn.Linear(1024, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
ShuffleNet
Channel Shuffle Operation
Copy
def channel_shuffle(x: torch.Tensor, groups: int) -> torch.Tensor:
"""
Channel shuffle operation for group convolutions.
Enables information flow between groups by shuffling channels.
"""
batch, channels, height, width = x.shape
# Reshape to (batch, groups, channels_per_group, H, W)
x = x.view(batch, groups, channels // groups, height, width)
# Transpose groups and channels_per_group
x = x.transpose(1, 2).contiguous()
# Flatten back
x = x.view(batch, channels, height, width)
return x
class ShuffleNetV2Block(nn.Module):
"""
ShuffleNetV2 building block.
Key innovations:
1. Channel split instead of pointwise group conv
2. Channel shuffle for cross-group communication
3. Efficient memory access patterns
"""
def __init__(
self,
in_channels: int,
out_channels: int,
stride: int = 1
):
super().__init__()
self.stride = stride
branch_channels = out_channels // 2
if stride == 1:
self.branch1 = nn.Identity()
else:
self.branch1 = nn.Sequential(
# Depthwise
nn.Conv2d(in_channels, in_channels, 3, stride, 1,
groups=in_channels, bias=False),
nn.BatchNorm2d(in_channels),
# Pointwise
nn.Conv2d(in_channels, branch_channels, 1, bias=False),
nn.BatchNorm2d(branch_channels),
nn.ReLU(inplace=True)
)
# Main branch
in_c = in_channels if stride > 1 else branch_channels
self.branch2 = nn.Sequential(
# Pointwise
nn.Conv2d(in_c, branch_channels, 1, bias=False),
nn.BatchNorm2d(branch_channels),
nn.ReLU(inplace=True),
# Depthwise
nn.Conv2d(branch_channels, branch_channels, 3, stride, 1,
groups=branch_channels, bias=False),
nn.BatchNorm2d(branch_channels),
# Pointwise
nn.Conv2d(branch_channels, branch_channels, 1, bias=False),
nn.BatchNorm2d(branch_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
if self.stride == 1:
# Channel split
x1, x2 = x.chunk(2, dim=1)
out = torch.cat([x1, self.branch2(x2)], dim=1)
else:
out = torch.cat([self.branch1(x), self.branch2(x)], dim=1)
# Channel shuffle
out = channel_shuffle(out, 2)
return out
class ShuffleNetV2(nn.Module):
"""
ShuffleNetV2: Practical Guidelines for Efficient CNN Design.
Design principles:
G1: Equal channel width minimizes memory access cost
G2: Excessive group convolution increases MAC
G3: Network fragmentation reduces parallelism
G4: Element-wise operations are non-negligible
"""
def __init__(
self,
num_classes: int = 1000,
width_mult: float = 1.0
):
super().__init__()
# Width configurations
if width_mult == 0.5:
out_channels = [24, 48, 96, 192, 1024]
elif width_mult == 1.0:
out_channels = [24, 116, 232, 464, 1024]
elif width_mult == 1.5:
out_channels = [24, 176, 352, 704, 1024]
elif width_mult == 2.0:
out_channels = [24, 244, 488, 976, 2048]
# First conv
self.conv1 = nn.Sequential(
nn.Conv2d(3, out_channels[0], 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(out_channels[0]),
nn.ReLU(inplace=True)
)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
# Stages
self.stage2 = self._make_stage(out_channels[0], out_channels[1], 4)
self.stage3 = self._make_stage(out_channels[1], out_channels[2], 8)
self.stage4 = self._make_stage(out_channels[2], out_channels[3], 4)
# Last conv
self.conv5 = nn.Sequential(
nn.Conv2d(out_channels[3], out_channels[4], 1, bias=False),
nn.BatchNorm2d(out_channels[4]),
nn.ReLU(inplace=True)
)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(out_channels[4], num_classes)
def _make_stage(self, in_c, out_c, num_blocks):
layers = [ShuffleNetV2Block(in_c, out_c, stride=2)]
for _ in range(num_blocks - 1):
layers.append(ShuffleNetV2Block(out_c, out_c, stride=1))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
EfficientNet
Compound Scaling
Copy
class EfficientNetConfig:
"""
EfficientNet compound scaling.
Key insight: Scale depth, width, and resolution together.
φ: compound coefficient
depth = α^φ
width = β^φ
resolution = γ^φ
Subject to: α × β² × γ² ≈ 2 (FLOPs double)
"""
# Base model (EfficientNet-B0)
BASE_WIDTH = 1.0
BASE_DEPTH = 1.0
BASE_RESOLUTION = 224
# Scaling coefficients (found via grid search)
ALPHA = 1.2 # Depth
BETA = 1.1 # Width
GAMMA = 1.15 # Resolution
# Model configurations
CONFIGS = {
'b0': (1.0, 1.0, 224),
'b1': (1.0, 1.1, 240),
'b2': (1.1, 1.2, 260),
'b3': (1.2, 1.4, 300),
'b4': (1.4, 1.8, 380),
'b5': (1.6, 2.2, 456),
'b6': (1.8, 2.6, 528),
'b7': (2.0, 3.1, 600),
}
@classmethod
def get_config(cls, model_name: str):
if model_name not in cls.CONFIGS:
raise ValueError(f"Unknown model: {model_name}")
width_mult, depth_mult, resolution = cls.CONFIGS[model_name]
return width_mult, depth_mult, resolution
class MBConv(nn.Module):
"""
Mobile Inverted Bottleneck with Squeeze-and-Excitation.
Building block of EfficientNet.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int,
stride: int,
expand_ratio: int,
se_ratio: float = 0.25,
drop_connect_rate: float = 0.0
):
super().__init__()
self.use_residual = stride == 1 and in_channels == out_channels
self.drop_connect_rate = drop_connect_rate
hidden_dim = in_channels * expand_ratio
layers = []
# Expansion
if expand_ratio != 1:
layers.extend([
nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(inplace=True) # Swish activation
])
# Depthwise
layers.extend([
nn.Conv2d(
hidden_dim, hidden_dim, kernel_size,
stride=stride, padding=kernel_size//2,
groups=hidden_dim, bias=False
),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(inplace=True)
])
# SE block
se_channels = max(1, int(in_channels * se_ratio))
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(hidden_dim, se_channels, 1),
nn.SiLU(inplace=True),
nn.Conv2d(se_channels, hidden_dim, 1),
nn.Sigmoid()
)
# Projection
layers.extend([
nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
])
self.conv = nn.Sequential(*layers)
def _drop_connect(self, x: torch.Tensor) -> torch.Tensor:
"""Stochastic depth / drop connect."""
if not self.training or self.drop_connect_rate == 0:
return x
keep_prob = 1 - self.drop_connect_rate
random_tensor = keep_prob + torch.rand(
(x.shape[0], 1, 1, 1),
dtype=x.dtype,
device=x.device
)
random_tensor.floor_()
return x / keep_prob * random_tensor
def forward(self, x):
out = self.conv[:len(self.conv)-2](x) # Up to depthwise
out = out * self.se(out) # SE attention
out = self.conv[-2:](out) # Projection
if self.use_residual:
out = self._drop_connect(out) + x
return out
class EfficientNet(nn.Module):
"""
EfficientNet: Rethinking Model Scaling for CNNs.
Achieves state-of-the-art accuracy with far fewer parameters
through compound scaling.
"""
def __init__(
self,
model_name: str = 'b0',
num_classes: int = 1000,
drop_connect_rate: float = 0.2
):
super().__init__()
width_mult, depth_mult, resolution = EfficientNetConfig.get_config(model_name)
def c(channels):
"""Round channels to divisible by 8."""
return int(math.ceil(channels * width_mult / 8) * 8)
def d(num_layers):
"""Scale depth."""
return int(math.ceil(num_layers * depth_mult))
# Block configuration: (expand, channels, layers, kernel, stride)
block_config = [
(1, 16, 1, 3, 1),
(6, 24, 2, 3, 2),
(6, 40, 2, 5, 2),
(6, 80, 3, 3, 2),
(6, 112, 3, 5, 1),
(6, 192, 4, 5, 2),
(6, 320, 1, 3, 1),
]
# Stem
self.stem = nn.Sequential(
nn.Conv2d(3, c(32), 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c(32)),
nn.SiLU(inplace=True)
)
# Build blocks
blocks = []
total_blocks = sum(d(n) for _, _, n, _, _ in block_config)
block_idx = 0
in_channels = c(32)
for expand, out_c, num_layers, kernel, stride in block_config:
out_channels = c(out_c)
for layer_idx in range(d(num_layers)):
# Drop connect rate increases with depth
drop_rate = drop_connect_rate * block_idx / total_blocks
blocks.append(MBConv(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel,
stride=stride if layer_idx == 0 else 1,
expand_ratio=expand,
drop_connect_rate=drop_rate
))
in_channels = out_channels
block_idx += 1
self.blocks = nn.Sequential(*blocks)
# Head
head_channels = c(1280)
self.head = nn.Sequential(
nn.Conv2d(in_channels, head_channels, 1, bias=False),
nn.BatchNorm2d(head_channels),
nn.SiLU(inplace=True),
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(head_channels, num_classes)
)
def forward(self, x):
x = self.stem(x)
x = self.blocks(x)
x = self.head(x)
return x
GhostNet
Copy
class GhostModule(nn.Module):
"""
Ghost Module: Generate more features from cheap operations.
Idea: Many feature maps are similar (redundant).
Generate a few "intrinsic" features, then create "ghosts" cheaply.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int = 1,
ratio: int = 2, # Ghost ratio
dw_kernel: int = 3, # Depthwise kernel for ghosts
stride: int = 1
):
super().__init__()
# Number of intrinsic features
self.intrinsic_channels = out_channels // ratio
self.ghost_channels = self.intrinsic_channels * (ratio - 1)
# Primary conv (generates intrinsic features)
self.primary_conv = nn.Sequential(
nn.Conv2d(in_channels, self.intrinsic_channels, kernel_size,
stride=stride, padding=kernel_size//2, bias=False),
nn.BatchNorm2d(self.intrinsic_channels),
nn.ReLU(inplace=True)
)
# Cheap operation (generates ghost features)
self.cheap_operation = nn.Sequential(
nn.Conv2d(self.intrinsic_channels, self.ghost_channels, dw_kernel,
stride=1, padding=dw_kernel//2,
groups=self.intrinsic_channels, bias=False),
nn.BatchNorm2d(self.ghost_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
intrinsic = self.primary_conv(x)
ghosts = self.cheap_operation(intrinsic)
return torch.cat([intrinsic, ghosts], dim=1)
class GhostBottleneck(nn.Module):
"""Ghost Bottleneck for GhostNet."""
def __init__(
self,
in_channels: int,
hidden_channels: int,
out_channels: int,
kernel_size: int = 3,
stride: int = 1,
use_se: bool = False
):
super().__init__()
self.stride = stride
# Ghost module 1 (expansion)
self.ghost1 = GhostModule(in_channels, hidden_channels)
# Depthwise conv (if stride > 1)
if stride > 1:
self.dw_conv = nn.Sequential(
nn.Conv2d(hidden_channels, hidden_channels, kernel_size,
stride=stride, padding=kernel_size//2,
groups=hidden_channels, bias=False),
nn.BatchNorm2d(hidden_channels)
)
# SE block
self.se = SEBlock(hidden_channels) if use_se else nn.Identity()
# Ghost module 2 (projection, no activation)
self.ghost2 = GhostModule(hidden_channels, out_channels)
# Skip connection
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, in_channels, kernel_size,
stride=stride, padding=kernel_size//2,
groups=in_channels, bias=False),
nn.BatchNorm2d(in_channels),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
)
else:
self.shortcut = nn.Identity()
def forward(self, x):
residual = self.shortcut(x)
out = self.ghost1(x)
if self.stride > 1:
out = self.dw_conv(out)
out = self.se(out)
out = self.ghost2(out)
return out + residual
Model Comparison
Copy
def compare_efficiency():
"""Compare efficient architectures."""
comparison = """
╔══════════════════════════════════════════════════════════════════════╗
║ EFFICIENT ARCHITECTURE COMPARISON ║
╠══════════════════════════════════════════════════════════════════════╣
║ ║
║ Model Params FLOPs Top-1 Latency* ║
║ ──────────────────────────────────────────────────────────────── ║
║ MobileNetV1 4.2M 569M 70.6% 33ms ║
║ MobileNetV2 3.4M 300M 72.0% 29ms ║
║ MobileNetV3-S 2.5M 56M 67.4% 15ms ║
║ MobileNetV3-L 5.4M 219M 75.2% 27ms ║
║ ║
║ ShuffleNetV2 2.3M 146M 69.4% 20ms ║
║ ║
║ EfficientNet-B0 5.3M 390M 77.1% 35ms ║
║ EfficientNet-B1 7.8M 700M 79.1% 52ms ║
║ EfficientNet-B4 19M 4.2B 82.9% 166ms ║
║ ║
║ GhostNet 5.2M 141M 73.9% 25ms ║
║ ║
║ *Latency on mobile device (Pixel 3) ║
║ ║
╠══════════════════════════════════════════════════════════════════════╣
║ KEY DESIGN PRINCIPLES ║
╠══════════════════════════════════════════════════════════════════════╣
║ ║
║ 1. Depthwise Separable Convolutions ║
║ • 8-9x fewer FLOPs than standard conv ║
║ • Used by all efficient architectures ║
║ ║
║ 2. Inverted Residuals ║
║ • Expand → Depthwise → Project ║
║ • Better gradient flow ║
║ ║
║ 3. Squeeze-and-Excitation ║
║ • Channel attention with minimal overhead ║
║ • 0.5-1% accuracy boost ║
║ ║
║ 4. Neural Architecture Search ║
║ • MobileNetV3, EfficientNet optimized by NAS ║
║ • Better than hand-designed ║
║ ║
║ 5. Compound Scaling ║
║ • Scale depth, width, resolution together ║
║ • EfficientNet's key innovation ║
║ ║
╚══════════════════════════════════════════════════════════════════════╝
"""
print(comparison)
compare_efficiency()
Exercises
Exercise 1: Profile Your Model
Exercise 1: Profile Your Model
Measure actual FLOPs and latency:
Copy
from thop import profile
model = MobileNetV2()
input = torch.randn(1, 3, 224, 224)
flops, params = profile(model, inputs=(input,))
print(f"FLOPs: {flops/1e9:.2f}G, Params: {params/1e6:.2f}M")
Exercise 2: Design Custom Efficient Block
Exercise 2: Design Custom Efficient Block
Create your own efficient block combining:
- Depthwise separable convolution
- Squeeze-and-excitation
- Residual connection
Exercise 3: Width vs Depth Trade-off
Exercise 3: Width vs Depth Trade-off
Experiment with different width/depth trade-offs:
- Wide and shallow
- Narrow and deep
- Balanced