Skip to main content

Dynamic Memory Management

The heap is where long-lived, variable-sized data lives. Let’s master it. Dynamic memory on the heap and common bugs

Understanding malloc Overhead

The Hidden Costs

Before using malloc, understand its costs: The hidden work malloc does:
  1. Find free block: Search free list (O(n) in worst case)
  2. Split block: If block is larger than requested
  3. Update metadata: Track allocation size, free list pointers
  4. Alignment: Ensure proper alignment (adds padding)
  5. Thread safety: Lock/unlock mutex (multi-threaded programs)
Typical overhead:
  • Time: ~100-500ns per allocation (200-1000 CPU cycles)
  • Space: 8-16 bytes of metadata per allocation
  • Fragmentation: Can waste 10-30% of heap over time
Real-world impact: A program doing 1 million allocations/second spends 10-50% of CPU time just in malloc! When this matters:
  • Game engines (allocate every frame, 60 FPS = 16ms budget)
  • Network servers (allocate per request, thousands/second)
  • Real-time systems (unpredictable latency)
  • High-frequency trading (microseconds matter)
Why malloc is slow:
// What you write:
int *p = malloc(sizeof(int));

// What actually happens:
// 1. Lock heap mutex
// 2. Search free list for suitable block
// 3. Split block if too large
// 4. Update free list pointers
// 5. Store size metadata
// 6. Align pointer
// 7. Unlock heap mutex
// 8. Return pointer
This is why custom allocators exist - they trade generality for speed.

The malloc Family

Basic Usage

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void) {
    // malloc: allocate uninitialized memory
    int *arr = malloc(100 * sizeof(int));
    if (!arr) {
        fprintf(stderr, "malloc failed\n");
        return 1;
    }
    // Contents are UNDEFINED - could be anything!
    
    // calloc: allocate zero-initialized memory
    int *zeros = calloc(100, sizeof(int));
    if (!zeros) {
        free(arr);
        return 1;
    }
    // All elements guaranteed to be 0
    
    // realloc: resize allocation
    int *bigger = realloc(arr, 200 * sizeof(int));
    if (!bigger) {
        // Original arr is still valid!
        free(arr);
        free(zeros);
        return 1;
    }
    arr = bigger;  // arr might have moved!
    // New elements (100-199) are uninitialized
    
    // free: release memory
    free(arr);
    free(zeros);
    // After free, pointers are invalid (dangling)
    arr = NULL;   // Good practice: null after free
    zeros = NULL;
    
    return 0;
}

realloc Gotchas

// WRONG: memory leak if realloc fails
ptr = realloc(ptr, new_size);  // If fails, original ptr is lost!

// CORRECT: preserve original on failure
void *new_ptr = realloc(ptr, new_size);
if (new_ptr) {
    ptr = new_ptr;
} else {
    // Handle failure, ptr still valid
}

// realloc(NULL, size) == malloc(size)
int *p = NULL;
p = realloc(p, 100);  // Works like malloc(100)

// realloc(ptr, 0) behavior is implementation-defined
// DON'T rely on it - use free() explicitly

aligned_alloc (C11)

#include <stdlib.h>

// For SIMD, cache lines, etc.
void *p = aligned_alloc(64, 1024);  // 64-byte alignment, 1024 bytes
// Size must be multiple of alignment

// POSIX alternative
void *p2;
int ret = posix_memalign(&p2, 64, 1024);
if (ret != 0) {
    // Handle error
}

free(p);
free(p2);

Memory Safety Patterns

Defensive Allocation

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Safe allocation wrapper
void *safe_malloc(size_t size) {
    if (size == 0) {
        return NULL;  // Or return minimal allocation
    }
    
    void *ptr = malloc(size);
    if (!ptr) {
        fprintf(stderr, "Out of memory allocating %zu bytes\n", size);
        abort();  // Or return NULL and let caller handle
    }
    return ptr;
}

// Safe calloc (also checks for overflow)
void *safe_calloc(size_t nmemb, size_t size) {
    // Check for multiplication overflow
    if (nmemb != 0 && size > SIZE_MAX / nmemb) {
        fprintf(stderr, "Allocation size overflow\n");
        abort();
    }
    
    void *ptr = calloc(nmemb, size);
    if (!ptr && nmemb > 0 && size > 0) {
        fprintf(stderr, "Out of memory\n");
        abort();
    }
    return ptr;
}

// Safe strdup
char *safe_strdup(const char *s) {
    if (!s) return NULL;
    
    size_t len = strlen(s) + 1;
    char *dup = malloc(len);
    if (!dup) {
        fprintf(stderr, "Out of memory\n");
        abort();
    }
    return memcpy(dup, s, len);
}

Ownership and RAII Patterns

#include <stdio.h>
#include <stdlib.h>

// Resource acquisition is initialization (in C style)

typedef struct {
    int *data;
    size_t size;
    size_t capacity;
} IntVector;

IntVector *vector_create(size_t capacity) {
    IntVector *v = malloc(sizeof(IntVector));
    if (!v) return NULL;
    
    v->data = malloc(capacity * sizeof(int));
    if (!v->data) {
        free(v);
        return NULL;
    }
    
    v->size = 0;
    v->capacity = capacity;
    return v;
}

void vector_destroy(IntVector *v) {
    if (v) {
        free(v->data);
        free(v);
    }
}

// Cleanup attribute (GCC/Clang extension)
#define CLEANUP(func) __attribute__((cleanup(func)))

void vector_cleanup(IntVector **vp) {
    if (*vp) {
        vector_destroy(*vp);
        *vp = NULL;
    }
}

void example(void) {
    CLEANUP(vector_cleanup) IntVector *v = vector_create(10);
    if (!v) return;
    
    // Use v...
    
    // Automatically destroyed when function exits (even on error)
}

Defer Pattern (Manual)

#include <stdio.h>
#include <stdlib.h>

int process_file(const char *filename) {
    int result = -1;
    FILE *f = NULL;
    char *buffer = NULL;
    int *data = NULL;
    
    f = fopen(filename, "r");
    if (!f) goto cleanup;
    
    buffer = malloc(4096);
    if (!buffer) goto cleanup;
    
    data = malloc(1000 * sizeof(int));
    if (!data) goto cleanup;
    
    // Actual processing...
    result = 0;
    
cleanup:
    free(data);
    free(buffer);
    if (f) fclose(f);
    
    return result;
}

Custom Allocators

Why Custom Allocators?

Each allocator solves a specific problem that malloc handles poorly: Arena Allocator - Use when:
  • Problem: Many small allocations with same lifetime
  • malloc issue: Overhead of tracking each allocation individually
  • Solution: Allocate from bump pointer, free all at once
  • Example: Per-request allocations in web server, temporary parsing data, compiler AST nodes
  • Speedup: 10-100x faster than malloc
  • Tradeoff: Can’t free individual allocations
Pool Allocator - Use when:
  • Problem: Many same-sized objects allocated/freed frequently
  • malloc issue: Fragmentation from mixed-size allocations, search overhead
  • Solution: Pre-allocate array of fixed-size blocks
  • Example: Game entities, network packets, database records, object pools
  • Speedup: 5-50x faster, zero fragmentation
  • Tradeoff: Wasted space if pool size is wrong
Slab Allocator - Use when:
  • Problem: Kernel needs many instances of same struct type
  • malloc issue: Initialization overhead, poor cache locality
  • Solution: Caches of pre-initialized objects
  • Example: Linux kernel (task_struct, inode, dentry), high-performance servers
  • Benefit: Reduces initialization overhead, better cache locality
  • Tradeoff: More complex implementation
Decision tree:
Same lifetime for all allocations?
  ├─ Yes → Arena Allocator
  └─ No → Same size + frequent alloc/free?
      ├─ Yes → Pool Allocator
      └─ No → Same type + need caching?
          ├─ Yes → Slab Allocator
          └─ No → Use malloc
Performance comparison (allocating 1 million 64-byte objects):
malloc:  ~500ms  (baseline)
Arena:   ~5ms    (100x faster)
Pool:    ~10ms   (50x faster)
Slab:    ~20ms   (25x faster, includes initialization)

Arena Allocator

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

typedef struct {
    char *buffer;
    size_t capacity;
    size_t offset;
} Arena;

Arena *arena_create(size_t capacity) {
    Arena *a = malloc(sizeof(Arena));
    if (!a) return NULL;
    
    a->buffer = malloc(capacity);
    if (!a->buffer) {
        free(a);
        return NULL;
    }
    
    a->capacity = capacity;
    a->offset = 0;
    return a;
}

void *arena_alloc(Arena *a, size_t size, size_t alignment) {
    // Align offset
    size_t aligned_offset = (a->offset + alignment - 1) & ~(alignment - 1);
    
    if (aligned_offset + size > a->capacity) {
        return NULL;  // Out of space
    }
    
    void *ptr = a->buffer + aligned_offset;
    a->offset = aligned_offset + size;
    return ptr;
}

void arena_reset(Arena *a) {
    a->offset = 0;  // All allocations freed instantly!
}

void arena_destroy(Arena *a) {
    if (a) {
        free(a->buffer);
        free(a);
    }
}

// Usage
int main(void) {
    Arena *arena = arena_create(1024 * 1024);  // 1MB arena
    
    // Fast allocations, no individual frees needed
    int *arr = arena_alloc(arena, 100 * sizeof(int), alignof(int));
    char *str = arena_alloc(arena, 256, 1);
    double *d = arena_alloc(arena, sizeof(double), alignof(double));
    
    // Use allocations...
    
    // Free everything at once
    arena_reset(arena);
    
    // Or destroy when done
    arena_destroy(arena);
    return 0;
}

Pool Allocator

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

typedef struct PoolBlock {
    struct PoolBlock *next;
} PoolBlock;

typedef struct {
    char *memory;
    size_t block_size;
    size_t num_blocks;
    PoolBlock *free_list;
} Pool;

Pool *pool_create(size_t block_size, size_t num_blocks) {
    // Ensure block can hold a pointer
    if (block_size < sizeof(PoolBlock)) {
        block_size = sizeof(PoolBlock);
    }
    
    Pool *p = malloc(sizeof(Pool));
    if (!p) return NULL;
    
    p->memory = malloc(block_size * num_blocks);
    if (!p->memory) {
        free(p);
        return NULL;
    }
    
    p->block_size = block_size;
    p->num_blocks = num_blocks;
    
    // Build free list
    p->free_list = NULL;
    for (size_t i = 0; i < num_blocks; i++) {
        PoolBlock *block = (PoolBlock*)(p->memory + i * block_size);
        block->next = p->free_list;
        p->free_list = block;
    }
    
    return p;
}

void *pool_alloc(Pool *p) {
    if (!p->free_list) {
        return NULL;  // Pool exhausted
    }
    
    PoolBlock *block = p->free_list;
    p->free_list = block->next;
    return block;
}

void pool_free(Pool *p, void *ptr) {
    if (!ptr) return;
    
    PoolBlock *block = ptr;
    block->next = p->free_list;
    p->free_list = block;
}

void pool_destroy(Pool *p) {
    if (p) {
        free(p->memory);
        free(p);
    }
}

// Usage - great for many same-sized objects
typedef struct {
    int x, y, z;
    char data[20];
} Entity;

int main(void) {
    Pool *entity_pool = pool_create(sizeof(Entity), 1000);
    
    Entity *entities[100];
    for (int i = 0; i < 100; i++) {
        entities[i] = pool_alloc(entity_pool);
        entities[i]->x = i;
    }
    
    // O(1) allocation and deallocation!
    pool_free(entity_pool, entities[50]);
    entities[50] = pool_alloc(entity_pool);  // Reuses slot
    
    pool_destroy(entity_pool);
    return 0;
}

Slab Allocator

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define SLAB_SIZE (4096 * 4)  // 16KB slabs

typedef struct Slab {
    struct Slab *next;
    size_t free_count;
    char *free_ptr;
    char data[];
} Slab;

typedef struct {
    size_t object_size;
    Slab *partial_slabs;  // Has some free objects
    Slab *full_slabs;     // All objects allocated
    Slab *empty_slabs;    // All objects free (cache)
} SlabCache;

SlabCache *slab_cache_create(size_t object_size) {
    SlabCache *cache = calloc(1, sizeof(SlabCache));
    if (!cache) return NULL;
    
    cache->object_size = object_size < sizeof(void*) ? sizeof(void*) : object_size;
    return cache;
}

Slab *slab_create(SlabCache *cache) {
    Slab *slab = malloc(SLAB_SIZE);
    if (!slab) return NULL;
    
    slab->next = NULL;
    size_t data_size = SLAB_SIZE - sizeof(Slab);
    slab->free_count = data_size / cache->object_size;
    
    // Build free list in slab
    slab->free_ptr = slab->data;
    char *ptr = slab->data;
    for (size_t i = 0; i < slab->free_count - 1; i++) {
        *(char**)(ptr) = ptr + cache->object_size;
        ptr += cache->object_size;
    }
    *(char**)(ptr) = NULL;
    
    return slab;
}

void *slab_alloc(SlabCache *cache) {
    Slab *slab = cache->partial_slabs;
    
    if (!slab) {
        // Try empty slabs first
        if (cache->empty_slabs) {
            slab = cache->empty_slabs;
            cache->empty_slabs = slab->next;
        } else {
            slab = slab_create(cache);
            if (!slab) return NULL;
        }
        slab->next = cache->partial_slabs;
        cache->partial_slabs = slab;
    }
    
    void *obj = slab->free_ptr;
    slab->free_ptr = *(char**)obj;
    slab->free_count--;
    
    // Move to full list if exhausted
    if (slab->free_count == 0) {
        cache->partial_slabs = slab->next;
        slab->next = cache->full_slabs;
        cache->full_slabs = slab;
    }
    
    return obj;
}

// Simplified free - production version would need object-to-slab mapping

Memory-Mapped I/O

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

// High-performance file reading
typedef struct {
    void *data;
    size_t size;
    int fd;
} MappedFile;

MappedFile *map_file(const char *path) {
    MappedFile *mf = malloc(sizeof(MappedFile));
    if (!mf) return NULL;
    
    mf->fd = open(path, O_RDONLY);
    if (mf->fd < 0) {
        free(mf);
        return NULL;
    }
    
    struct stat st;
    if (fstat(mf->fd, &st) < 0) {
        close(mf->fd);
        free(mf);
        return NULL;
    }
    
    mf->size = st.st_size;
    mf->data = mmap(NULL, mf->size, PROT_READ, MAP_PRIVATE, mf->fd, 0);
    
    if (mf->data == MAP_FAILED) {
        close(mf->fd);
        free(mf);
        return NULL;
    }
    
    // Hint: sequential access
    madvise(mf->data, mf->size, MADV_SEQUENTIAL);
    
    return mf;
}

void unmap_file(MappedFile *mf) {
    if (mf) {
        munmap(mf->data, mf->size);
        close(mf->fd);
        free(mf);
    }
}

// Usage: process huge files without loading entirely into heap
int main(void) {
    MappedFile *mf = map_file("hugefile.dat");
    if (!mf) return 1;
    
    // Access as if in memory
    char *data = mf->data;
    size_t count = 0;
    for (size_t i = 0; i < mf->size; i++) {
        if (data[i] == '\n') count++;
    }
    printf("Lines: %zu\n", count);
    
    unmap_file(mf);
    return 0;
}

Anonymous Memory Mapping

#include <sys/mman.h>

// Allocate large block without malloc overhead
void *big_alloc(size_t size) {
    void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    return (ptr == MAP_FAILED) ? NULL : ptr;
}

void big_free(void *ptr, size_t size) {
    munmap(ptr, size);
}

// Guard pages for security
void *guarded_alloc(size_t size) {
    size_t page_size = sysconf(_SC_PAGESIZE);
    size_t total = size + 2 * page_size;
    
    char *base = mmap(NULL, total, PROT_READ | PROT_WRITE,
                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (base == MAP_FAILED) return NULL;
    
    // Make first and last pages inaccessible
    mprotect(base, page_size, PROT_NONE);
    mprotect(base + page_size + size, page_size, PROT_NONE);
    
    return base + page_size;
}

Debugging Memory Issues

Custom Debug Allocator

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAGIC_ALLOC 0xDEADBEEF
#define MAGIC_FREE  0xFEEDFACE
#define GUARD_SIZE  8
#define GUARD_BYTE  0xAB

typedef struct {
    uint32_t magic;
    size_t size;
    const char *file;
    int line;
    char guard_before[GUARD_SIZE];
} DebugHeader;

void *debug_malloc(size_t size, const char *file, int line) {
    size_t total = sizeof(DebugHeader) + size + GUARD_SIZE;
    DebugHeader *header = malloc(total);
    if (!header) return NULL;
    
    header->magic = MAGIC_ALLOC;
    header->size = size;
    header->file = file;
    header->line = line;
    memset(header->guard_before, GUARD_BYTE, GUARD_SIZE);
    
    char *user_ptr = (char*)(header + 1);
    memset(user_ptr + size, GUARD_BYTE, GUARD_SIZE);  // Guard after
    
    return user_ptr;
}

void debug_free(void *ptr, const char *file, int line) {
    if (!ptr) return;
    
    DebugHeader *header = (DebugHeader*)ptr - 1;
    
    // Check magic
    if (header->magic == MAGIC_FREE) {
        fprintf(stderr, "DOUBLE FREE at %s:%d (originally freed elsewhere)\n",
                file, line);
        abort();
    }
    
    if (header->magic != MAGIC_ALLOC) {
        fprintf(stderr, "INVALID FREE at %s:%d (not allocated or corrupted)\n",
                file, line);
        abort();
    }
    
    // Check guard bytes
    for (int i = 0; i < GUARD_SIZE; i++) {
        if ((unsigned char)header->guard_before[i] != GUARD_BYTE) {
            fprintf(stderr, "BUFFER UNDERFLOW at %s:%d (allocated at %s:%d)\n",
                    file, line, header->file, header->line);
            abort();
        }
    }
    
    char *after = (char*)(header + 1) + header->size;
    for (int i = 0; i < GUARD_SIZE; i++) {
        if ((unsigned char)after[i] != GUARD_BYTE) {
            fprintf(stderr, "BUFFER OVERFLOW at %s:%d (allocated at %s:%d)\n",
                    file, line, header->file, header->line);
            abort();
        }
    }
    
    header->magic = MAGIC_FREE;
    memset(header + 1, 0xCD, header->size);  // Poison freed memory
    free(header);
}

#define malloc(size) debug_malloc(size, __FILE__, __LINE__)
#define free(ptr) debug_free(ptr, __FILE__, __LINE__)

Exercises

1

Safe String Library

Implement char *safe_strcat(const char *s1, const char *s2) that allocates a new string with the concatenation.
2

Growing Array

Implement a dynamic array that doubles capacity when full, with push, pop, get, and set operations.
3

Arena with Chunks

Extend the arena allocator to allocate new chunks when current one is full, instead of failing.
4

Memory Tracker

Build a memory tracker that records all allocations and can print a summary of leaks at program end.

Next Up

Preprocessor Mastery

Master macros, conditional compilation, and code generation