Skip to main content

Embedded Systems Programming

Write C code that runs on microcontrollers with limited RAM, no OS, and direct hardware access. This is where C truly shines.

Embedded Constraints

Limited RAM

Often 2KB-256KB total

No Heap

Static allocation only

No OS

Bare metal or RTOS

Real-time

Deterministic timing required

Memory-Mapped I/O

// Hardware registers are at fixed memory addresses
// Define registers with volatile pointers

// Example: GPIO registers for a generic ARM MCU
#define GPIO_BASE    0x40020000
#define GPIO_MODER   (*(volatile uint32_t*)(GPIO_BASE + 0x00))
#define GPIO_ODR     (*(volatile uint32_t*)(GPIO_BASE + 0x14))
#define GPIO_IDR     (*(volatile uint32_t*)(GPIO_BASE + 0x10))
#define GPIO_BSRR    (*(volatile uint32_t*)(GPIO_BASE + 0x18))

// Better: Use a struct for register layout
typedef struct {
    volatile uint32_t MODER;    // Mode register
    volatile uint32_t OTYPER;   // Output type
    volatile uint32_t OSPEEDR;  // Speed
    volatile uint32_t PUPDR;    // Pull-up/down
    volatile uint32_t IDR;      // Input data
    volatile uint32_t ODR;      // Output data
    volatile uint32_t BSRR;     // Bit set/reset
    volatile uint32_t LCKR;     // Lock
    volatile uint32_t AFR[2];   // Alternate function
} GPIO_TypeDef;

#define GPIOA ((GPIO_TypeDef*)0x40020000)
#define GPIOB ((GPIO_TypeDef*)0x40020400)

// Toggle LED on pin 5
void toggle_led(void) {
    GPIOA->ODR ^= (1 << 5);
}

// Read button on pin 0
bool button_pressed(void) {
    return (GPIOB->IDR & (1 << 0)) == 0;  // Active low
}

Bit Manipulation

// Essential bit manipulation macros
#define BIT(n)              (1UL << (n))
#define SET_BIT(reg, bit)   ((reg) |= BIT(bit))
#define CLR_BIT(reg, bit)   ((reg) &= ~BIT(bit))
#define TOG_BIT(reg, bit)   ((reg) ^= BIT(bit))
#define GET_BIT(reg, bit)   (((reg) >> (bit)) & 1)

// Multi-bit field manipulation
#define MASK(width)                 ((1UL << (width)) - 1)
#define GET_FIELD(reg, shift, width) (((reg) >> (shift)) & MASK(width))
#define SET_FIELD(reg, shift, width, val) \
    ((reg) = ((reg) & ~(MASK(width) << (shift))) | (((val) & MASK(width)) << (shift)))

// Example: Configure GPIO pin mode (2 bits per pin)
typedef enum {
    GPIO_MODE_INPUT  = 0,
    GPIO_MODE_OUTPUT = 1,
    GPIO_MODE_AF     = 2,
    GPIO_MODE_ANALOG = 3
} gpio_mode_t;

void gpio_set_mode(GPIO_TypeDef *gpio, int pin, gpio_mode_t mode) {
    SET_FIELD(gpio->MODER, pin * 2, 2, mode);
}

// Atomic bit-banding (Cortex-M specific)
// Allows atomic single-bit access
#define BITBAND_SRAM_BASE 0x22000000
#define BITBAND_SRAM(addr, bit) \
    (*(volatile uint32_t*)(BITBAND_SRAM_BASE + ((addr) - 0x20000000) * 32 + (bit) * 4))

// Atomic flag operations without interrupts disable
volatile uint32_t flags;
#define FLAG_DATA_READY 0
#define FLAG_ERROR      1

void set_flag(int bit) {
    BITBAND_SRAM((uint32_t)&flags, bit) = 1;
}

bool check_flag(int bit) {
    return BITBAND_SRAM((uint32_t)&flags, bit);
}

Interrupt Handlers

// Interrupt vector table (startup code)
typedef void (*irq_handler_t)(void);

extern uint32_t _estack;  // From linker script

__attribute__((section(".isr_vector")))
const irq_handler_t vector_table[] = {
    (irq_handler_t)&_estack,      // Initial stack pointer
    Reset_Handler,                 // Reset
    NMI_Handler,
    HardFault_Handler,
    MemManage_Handler,
    BusFault_Handler,
    UsageFault_Handler,
    0, 0, 0, 0,                   // Reserved
    SVC_Handler,
    DebugMon_Handler,
    0,                            // Reserved
    PendSV_Handler,
    SysTick_Handler,
    // External interrupts
    WWDG_IRQHandler,
    PVD_IRQHandler,
    // ... more handlers
};

// Interrupt handler best practices
volatile uint32_t tick_count = 0;

void SysTick_Handler(void) {
    tick_count++;  // Keep it short!
}

// Ring buffer for UART RX (interrupt-safe)
#define RX_BUFFER_SIZE 64
volatile struct {
    uint8_t data[RX_BUFFER_SIZE];
    volatile uint8_t head;
    volatile uint8_t tail;
} rx_buffer;

void USART1_IRQHandler(void) {
    if (USART1->SR & USART_SR_RXNE) {
        uint8_t byte = USART1->DR;
        uint8_t next = (rx_buffer.head + 1) % RX_BUFFER_SIZE;
        if (next != rx_buffer.tail) {  // Not full
            rx_buffer.data[rx_buffer.head] = byte;
            rx_buffer.head = next;
        }
    }
}

// Safe read from main loop
int uart_read(void) {
    if (rx_buffer.head == rx_buffer.tail) {
        return -1;  // Empty
    }
    uint8_t byte = rx_buffer.data[rx_buffer.tail];
    rx_buffer.tail = (rx_buffer.tail + 1) % RX_BUFFER_SIZE;
    return byte;
}

Critical Sections

// Disable interrupts for critical sections
static inline uint32_t disable_interrupts(void) {
    uint32_t primask;
    __asm__ volatile(
        "mrs %0, primask\n"
        "cpsid i\n"
        : "=r"(primask)
    );
    return primask;
}

static inline void restore_interrupts(uint32_t primask) {
    __asm__ volatile("msr primask, %0" : : "r"(primask));
}

// Usage
void critical_operation(void) {
    uint32_t state = disable_interrupts();
    
    // Critical section - no interrupts here
    shared_data++;
    
    restore_interrupts(state);
}

// RAII-style macro (GCC extension)
#define CRITICAL_SECTION \
    for (uint32_t __state = disable_interrupts(), __done = 0; \
         !__done; \
         restore_interrupts(__state), __done = 1)

void example(void) {
    CRITICAL_SECTION {
        // Protected code
        shared_resource = new_value;
    }
}

Static Memory Allocation

// No malloc in embedded! Use static allocation

// Static buffer pools
#define MAX_MESSAGES 16
#define MESSAGE_SIZE 64

typedef struct {
    uint8_t data[MESSAGE_SIZE];
    uint8_t in_use;
} Message;

static Message message_pool[MAX_MESSAGES];

Message *message_alloc(void) {
    for (int i = 0; i < MAX_MESSAGES; i++) {
        if (!message_pool[i].in_use) {
            message_pool[i].in_use = 1;
            return &message_pool[i];
        }
    }
    return NULL;  // Pool exhausted
}

void message_free(Message *msg) {
    msg->in_use = 0;
}

// Static ring buffer
#define QUEUE_SIZE 32

typedef struct {
    uint8_t buffer[QUEUE_SIZE];
    uint8_t head;
    uint8_t tail;
    uint8_t count;
} Queue;

void queue_init(Queue *q) {
    q->head = q->tail = q->count = 0;
}

bool queue_put(Queue *q, uint8_t data) {
    if (q->count >= QUEUE_SIZE) return false;
    q->buffer[q->head] = data;
    q->head = (q->head + 1) % QUEUE_SIZE;
    q->count++;
    return true;
}

bool queue_get(Queue *q, uint8_t *data) {
    if (q->count == 0) return false;
    *data = q->buffer[q->tail];
    q->tail = (q->tail + 1) % QUEUE_SIZE;
    q->count--;
    return true;
}

State Machines

// State machine pattern for embedded systems
typedef enum {
    STATE_IDLE,
    STATE_RECEIVING,
    STATE_PROCESSING,
    STATE_TRANSMITTING,
    STATE_ERROR
} SystemState;

typedef enum {
    EVENT_START,
    EVENT_DATA_READY,
    EVENT_COMPLETE,
    EVENT_TIMEOUT,
    EVENT_ERROR
} Event;

typedef struct {
    SystemState state;
    uint8_t rx_buffer[64];
    uint8_t rx_count;
    uint32_t timestamp;
} System;

// State handler function type
typedef SystemState (*StateHandler)(System *sys, Event event);

// State handlers
static SystemState handle_idle(System *sys, Event event) {
    if (event == EVENT_START) {
        sys->rx_count = 0;
        sys->timestamp = get_tick();
        return STATE_RECEIVING;
    }
    return STATE_IDLE;
}

static SystemState handle_receiving(System *sys, Event event) {
    if (event == EVENT_DATA_READY) {
        sys->rx_buffer[sys->rx_count++] = read_uart();
        if (sys->rx_count >= 64) {
            return STATE_PROCESSING;
        }
    } else if (event == EVENT_TIMEOUT) {
        return STATE_ERROR;
    }
    return STATE_RECEIVING;
}

// State table
static const StateHandler state_handlers[] = {
    [STATE_IDLE]        = handle_idle,
    [STATE_RECEIVING]   = handle_receiving,
    [STATE_PROCESSING]  = handle_processing,
    [STATE_TRANSMITTING] = handle_transmitting,
    [STATE_ERROR]       = handle_error
};

void system_handle_event(System *sys, Event event) {
    sys->state = state_handlers[sys->state](sys, event);
}

Peripheral Drivers

// Example: SPI driver
typedef struct {
    volatile uint32_t CR1;
    volatile uint32_t CR2;
    volatile uint32_t SR;
    volatile uint32_t DR;
    // ... more registers
} SPI_TypeDef;

#define SPI1 ((SPI_TypeDef*)0x40013000)

// Status bits
#define SPI_SR_TXE   (1 << 1)  // TX empty
#define SPI_SR_RXNE  (1 << 0)  // RX not empty
#define SPI_SR_BSY   (1 << 7)  // Busy

void spi_init(SPI_TypeDef *spi) {
    // Configure as master, 8-bit, etc.
    spi->CR1 = (1 << 2)   // Master
             | (3 << 3)   // Baud rate /16
             | (1 << 6);  // Enable
}

uint8_t spi_transfer(SPI_TypeDef *spi, uint8_t data) {
    // Wait for TX empty
    while (!(spi->SR & SPI_SR_TXE));
    
    // Send data
    spi->DR = data;
    
    // Wait for RX
    while (!(spi->SR & SPI_SR_RXNE));
    
    return spi->DR;
}

void spi_write_buffer(SPI_TypeDef *spi, const uint8_t *buf, size_t len) {
    for (size_t i = 0; i < len; i++) {
        spi_transfer(spi, buf[i]);
    }
}

// DMA for efficient transfers
void spi_dma_write(SPI_TypeDef *spi, const uint8_t *buf, size_t len) {
    // Configure DMA channel
    DMA1_Channel3->CMAR = (uint32_t)buf;
    DMA1_Channel3->CPAR = (uint32_t)&spi->DR;
    DMA1_Channel3->CNDTR = len;
    DMA1_Channel3->CCR = (1 << 4)   // Memory increment
                       | (1 << 0);  // Enable
    
    // Enable SPI DMA
    spi->CR2 |= (1 << 1);  // TX DMA enable
    
    // Wait for complete
    while (DMA1_Channel3->CNDTR > 0);
}

Timing and Delays

// SysTick timer for millisecond timing
#define SYSTICK_LOAD  (*(volatile uint32_t*)0xE000E014)
#define SYSTICK_VAL   (*(volatile uint32_t*)0xE000E018)
#define SYSTICK_CTRL  (*(volatile uint32_t*)0xE000E010)

static volatile uint32_t ticks = 0;

void systick_init(uint32_t cpu_freq) {
    SYSTICK_LOAD = (cpu_freq / 1000) - 1;  // 1ms period
    SYSTICK_VAL = 0;
    SYSTICK_CTRL = 0x07;  // Enable, interrupt, use CPU clock
}

void SysTick_Handler(void) {
    ticks++;
}

uint32_t millis(void) {
    return ticks;
}

void delay_ms(uint32_t ms) {
    uint32_t start = ticks;
    while ((ticks - start) < ms);
}

// Microsecond delay using cycle counting
void delay_us(uint32_t us) {
    // Assuming 72MHz clock, 72 cycles per microsecond
    uint32_t cycles = us * 72;
    
    // DWT cycle counter (Cortex-M3/M4)
    CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
    DWT->CYCCNT = 0;
    DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
    
    while (DWT->CYCCNT < cycles);
}

// Non-blocking timer
typedef struct {
    uint32_t start;
    uint32_t duration;
} Timer;

void timer_start(Timer *t, uint32_t duration_ms) {
    t->start = ticks;
    t->duration = duration_ms;
}

bool timer_expired(Timer *t) {
    return (ticks - t->start) >= t->duration;
}

Low Power Modes

// Power management
typedef enum {
    POWER_RUN,
    POWER_SLEEP,
    POWER_STOP,
    POWER_STANDBY
} PowerMode;

void enter_sleep(void) {
    // Clear SLEEPDEEP bit
    SCB->SCR &= ~SCB_SCR_SLEEPDEEP_Msk;
    __WFI();  // Wait for interrupt
}

void enter_stop(void) {
    // Set SLEEPDEEP
    SCB->SCR |= SCB_SCR_SLEEPDEEP_Msk;
    
    // Configure stop mode in PWR register
    PWR->CR |= PWR_CR_LPDS;  // Low-power deepsleep
    
    __WFI();
    
    // Reconfigure clocks after wakeup
    clock_init();
}

// Idle task for RTOS
void idle_task(void) {
    while (1) {
        // Enter low-power mode when nothing to do
        __WFI();
    }
}

Linker Script Basics

/* Basic linker script for STM32 */
MEMORY
{
    FLASH (rx)  : ORIGIN = 0x08000000, LENGTH = 128K
    SRAM (rwx)  : ORIGIN = 0x20000000, LENGTH = 20K
}

SECTIONS
{
    .isr_vector :
    {
        . = ALIGN(4);
        KEEP(*(.isr_vector))
        . = ALIGN(4);
    } > FLASH
    
    .text :
    {
        . = ALIGN(4);
        *(.text)
        *(.text*)
        *(.rodata)
        *(.rodata*)
        . = ALIGN(4);
        _etext = .;
    } > FLASH
    
    .data :
    {
        . = ALIGN(4);
        _sdata = .;
        *(.data)
        *(.data*)
        . = ALIGN(4);
        _edata = .;
    } > SRAM AT> FLASH
    
    .bss :
    {
        . = ALIGN(4);
        _sbss = .;
        *(.bss)
        *(.bss*)
        *(COMMON)
        . = ALIGN(4);
        _ebss = .;
    } > SRAM
    
    _estack = ORIGIN(SRAM) + LENGTH(SRAM);
}

Startup Code

// Minimal startup code
extern uint32_t _etext, _sdata, _edata, _sbss, _ebss, _estack;
extern int main(void);

void Reset_Handler(void) {
    // Copy .data from flash to RAM
    uint32_t *src = &_etext;
    uint32_t *dst = &_sdata;
    while (dst < &_edata) {
        *dst++ = *src++;
    }
    
    // Zero .bss
    dst = &_sbss;
    while (dst < &_ebss) {
        *dst++ = 0;
    }
    
    // Initialize system
    SystemInit();
    
    // Call main
    main();
    
    // Should never return
    while (1);
}

// Default handler for unimplemented interrupts
void Default_Handler(void) {
    while (1);
}

// Weak aliases - can be overridden
void NMI_Handler(void)       __attribute__((weak, alias("Default_Handler")));
void HardFault_Handler(void) __attribute__((weak, alias("Default_Handler")));
// ... more handlers

Best Practices

  1. Always use volatile for hardware registers and shared variables
  2. Avoid dynamic allocation - use static pools and arrays
  3. Keep ISRs short - defer work to main loop
  4. Use fixed-width types - uint8_t, uint32_t, etc.
  5. Design for power - sleep whenever possible
  6. Handle all error cases - embedded systems can’t crash gracefully
  7. Use watchdog timers - recover from hangs
  8. Document hardware dependencies - register addresses, timing requirements

Next Up

Linux Kernel Modules

Write code that runs inside the kernel