Skip to main content
C Undefined Behavior

Undefined Behavior

Undefined behavior (UB) is C’s most dangerous feature. The compiler can do literally anything when UB occurs—including appearing to work until it doesn’t.

Why Does C Have Undefined Behavior?

The Design Philosophy

Before looking at the dangers, understand why C was designed this way: The goal: Performance and portability above all else. Why UB exists:
  1. Performance: Checking for errors adds overhead.
    • Example: Checking array bounds on every access makes code 10-30% slower.
    • C’s philosophy: “Trust the programmer” - if you say arr[100], C assumes you know what you’re doing.
  2. Portability: Different hardware behaves differently.
    • Example: What happens when you right-shift a negative number?
    • x86: Arithmetic shift (preserves sign)
    • Some ARM/PowerPC: Logical shift (fills with zeros)
    • C’s solution: Make it “Implementation Defined” or “Undefined” so compilers can use the fastest native instruction.
  3. Optimization: The compiler assumes UB never happens.
    • Example: If you write x + 1, the compiler assumes no overflow.
    • This allows it to optimize loops and algebraic simplifications that wouldn’t be valid if it had to handle overflow wrapping.
The tradeoff: You get maximum speed and can run on any hardware, but you lose safety. C is a sharp tool - it cuts through problems efficiently, but can also cut you.

What is Undefined Behavior?

The C standard defines three categories of problematic code:
CategoryMeaningExample
Undefined BehaviorAnything can happen. Compiler can assume it never occurs.Signed overflow, null dereference
Unspecified BehaviorValid behaviors, but implementation choosesOrder of function argument evaluation
Implementation-DefinedMust be documented by compilerSize of int, right-shift of negative numbers
Undefined behavior is not just “unpredictable.” Modern compilers actively exploit UB for optimization. Code that “works” might break with a different compiler, optimization level, or even compiler version.
Common Undefined Behavior in C

Signed Integer Overflow

#include <stdio.h>
#include <limits.h>

// UNDEFINED BEHAVIOR
void overflow_demo(void) {
    int x = INT_MAX;
    x = x + 1;  // UB! Signed overflow
    printf("%d\n", x);  // May print anything, or not execute at all
}

// Real-world danger: loop optimization
int sum_to_n(int n) {
    int sum = 0;
    for (int i = 1; i <= n; i++) {  // If n = INT_MAX, i overflows
        sum += i;                    // Compiler may assume loop terminates
    }
    return sum;
}

// This check can be REMOVED by the compiler!
int bad_overflow_check(int x) {
    if (x + 1 < x) {  // Compiler assumes this is always false (no UB)
        printf("Overflow!\n");  // Dead code elimination!
    }
    return x + 1;
}

// SAFE alternatives
#include <stdint.h>

// Use unsigned (wraps predictably)
uint32_t safe_add_unsigned(uint32_t a, uint32_t b) {
    return a + b;  // Wraps on overflow (defined behavior)
}

// Check before operation
int safe_add_signed(int a, int b) {
    if ((b > 0 && a > INT_MAX - b) || (b < 0 && a < INT_MIN - b)) {
        // Handle overflow
        return 0;
    }
    return a + b;
}

// Use compiler builtins (GCC/Clang)
int safe_add_builtin(int a, int b, int *result) {
    return !__builtin_add_overflow(a, b, result);
}

Null Pointer Dereference

#include <stdio.h>
#include <stdlib.h>

// UNDEFINED BEHAVIOR
void null_deref(void) {
    int *p = NULL;
    *p = 42;  // UB! Dereferencing null pointer
}

// Dangerous pattern: check after dereference
void dangerous_check(int *p) {
    int x = *p;    // Dereference first
    if (p == NULL) {  // Compiler may remove this check!
        return;        // (If we dereferenced, p "can't" be null)
    }
    printf("%d\n", x);
}

// SAFE: check before dereference
void safe_check(int *p) {
    if (p == NULL) {
        return;
    }
    int x = *p;  // Safe now
    printf("%d\n", x);
}

Buffer Overflows

#include <stdio.h>
#include <string.h>

// UNDEFINED BEHAVIOR
void buffer_overflow(void) {
    int arr[10];
    arr[10] = 42;  // UB! Out of bounds access
    
    for (int i = 0; i <= 10; i++) {  // Off-by-one
        arr[i] = i;  // UB on last iteration
    }
}

// Stack smashing
void stack_smash(void) {
    char buffer[8];
    strcpy(buffer, "This string is way too long");  // UB! Buffer overflow
}

// SAFE alternatives
void safe_copy(void) {
    char buffer[8];
    strncpy(buffer, "Hello", sizeof(buffer) - 1);
    buffer[sizeof(buffer) - 1] = '\0';  // Ensure null termination
    
    // Or use snprintf
    snprintf(buffer, sizeof(buffer), "Hello");
}

Use After Free

#include <stdio.h>
#include <stdlib.h>

// UNDEFINED BEHAVIOR
void use_after_free(void) {
    int *p = malloc(sizeof(int));
    *p = 42;
    free(p);
    
    printf("%d\n", *p);  // UB! Using freed memory
    *p = 10;             // UB! Writing to freed memory
    free(p);             // UB! Double free
}

// SAFE pattern: null after free
void safe_free(void) {
    int *p = malloc(sizeof(int));
    if (!p) return;
    
    *p = 42;
    free(p);
    p = NULL;  // Prevent accidental reuse
    
    // if (*p) ...  // Would crash predictably on NULL dereference
}

Uninitialized Variables

#include <stdio.h>

// UNDEFINED BEHAVIOR
int uninitialized(void) {
    int x;           // Uninitialized
    return x;        // UB! Using uninitialized value
}

int uninitialized_conditional(int flag) {
    int x;
    if (flag) {
        x = 42;
    }
    return x;  // UB if flag was false!
}

// Compiler might "optimize" this:
int spooky(void) {
    int x;
    if (x == 0) {
        return 1;
    }
    if (x != 0) {
        return 2;
    }
    return 3;  // Compiler might ALWAYS return 3!
}

// SAFE: always initialize
int safe(void) {
    int x = 0;  // Or meaningful default
    return x;
}

Strict Aliasing Violations

#include <stdio.h>
#include <stdint.h>
#include <string.h>

// UNDEFINED BEHAVIOR (strict aliasing violation)
float int_to_float_bad(int x) {
    return *(float*)&x;  // UB! Accessing int as float
}

// The compiler may assume int* and float* never alias
void aliasing_demo(int *ip, float *fp) {
    *ip = 42;
    *fp = 3.14f;
    printf("%d\n", *ip);  // Compiler may print 42 (assumes no alias)
}

// SAFE: use unions (defined in C99)
float int_to_float_union(int x) {
    union {
        int i;
        float f;
    } u;
    u.i = x;
    return u.f;  // OK in C (not C++)
}

// SAFE: use memcpy
float int_to_float_memcpy(int x) {
    float f;
    memcpy(&f, &x, sizeof(f));  // Always safe
    return f;
}

// char* can alias anything (exception to strict aliasing)
void inspect_bytes(void *ptr, size_t n) {
    unsigned char *bytes = ptr;  // OK! char* can alias
    for (size_t i = 0; i < n; i++) {
        printf("%02x ", bytes[i]);
    }
}

Sequence Point Violations

#include <stdio.h>

// UNDEFINED BEHAVIOR
void sequence_violations(void) {
    int i = 0;
    
    i = i++;          // UB! Modifying i twice between sequence points
    i = ++i + i++;    // UB! Multiple modifications
    
    int arr[10];
    arr[i] = i++;     // UB! Which value of i?
    
    printf("%d %d\n", i++, i++);  // UB! Order unspecified, but also UB
}

// Function arguments: order is UNSPECIFIED (not undefined)
int f(int a, int b) { return a - b; }

void unspecified_order(void) {
    int i = 0;
    int x = f(i++, i++);  // UB! (modifying i twice)
    
    // But this is just unspecified:
    int a = 1, b = 2;
    int y = f(a, b);  // Order of evaluation unspecified, but OK
}

// SAFE: separate statements
void safe_increment(void) {
    int i = 0;
    int old = i;
    i++;
    // Use old and i separately
}

Shift Operators

#include <stdio.h>
#include <stdint.h>

// UNDEFINED BEHAVIOR
void bad_shifts(void) {
    int x = 1;
    
    x << 32;    // UB if int is 32 bits (shift >= width)
    x << -1;    // UB! Negative shift amount
    
    int y = -1;
    y << 1;     // UB! Left shift of negative number (until C23)
    y >> 1;     // Implementation-defined (arithmetic or logical)
}

// SAFE shifts
void safe_shifts(void) {
    uint32_t x = 1;
    
    // Check shift amount
    int shift = get_shift();
    if (shift >= 0 && shift < 32) {
        x <<= shift;  // Safe
    }
    
    // Use unsigned for predictable behavior
    uint32_t mask = 1U << 31;  // Safe, defined
}

Pointer Arithmetic Violations

#include <stdio.h>

// UNDEFINED BEHAVIOR
void pointer_violations(void) {
    int arr[10];
    int *p = arr;
    
    p = p + 11;       // UB! Past one-past-end
    p = p - 1;        // UB! Before array start
    
    int *q = arr + 10;  // OK: one-past-end is valid
    int x = *q;         // UB! Can't dereference one-past-end
    
    int a, b;
    ptrdiff_t diff = &a - &b;  // UB! Different objects
    
    int *null = NULL;
    null + 1;          // UB! Arithmetic on null pointer
}

// Comparing pointers from different arrays
void compare_violation(void) {
    int arr1[10], arr2[10];
    
    if (arr1 < arr2) {  // UB! Comparing unrelated pointers
        // ...
    }
    
    // Equality comparison is OK
    if (arr1 == arr2) {  // Always false, but defined
        // ...
    }
}

Data Races

#include <stdio.h>
#include <pthread.h>

int shared = 0;  // Non-atomic shared variable

// UNDEFINED BEHAVIOR (data race)
void *thread_func(void *arg) {
    for (int i = 0; i < 1000000; i++) {
        shared++;  // UB! Unsynchronized access
    }
    return NULL;
}

// SAFE: use atomics or synchronization
#include <stdatomic.h>

atomic_int atomic_shared = 0;

void *safe_thread_func(void *arg) {
    for (int i = 0; i < 1000000; i++) {
        atomic_fetch_add(&atomic_shared, 1);  // Safe
    }
    return NULL;
}

// Or use mutex
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

void *mutex_thread_func(void *arg) {
    for (int i = 0; i < 1000000; i++) {
        pthread_mutex_lock(&mutex);
        shared++;  // Safe under lock
        pthread_mutex_unlock(&mutex);
    }
    return NULL;
}

Division and Modulo

#include <stdio.h>
#include <limits.h>

// UNDEFINED BEHAVIOR
void division_ub(void) {
    int x = 10 / 0;     // UB! Division by zero
    int y = 10 % 0;     // UB! Modulo by zero
    
    // The most obscure UB:
    int z = INT_MIN / -1;  // UB! Result overflows (INT_MIN = -2^31, INT_MAX = 2^31-1)
}

// SAFE
int safe_divide(int a, int b) {
    if (b == 0) return 0;  // Handle division by zero
    if (a == INT_MIN && b == -1) return INT_MAX;  // Handle overflow
    return a / b;
}

How Compilers Exploit UB

#include <stdio.h>
#include <stdlib.h>

// Compiler optimizations based on UB assumptions:

// 1. Dead code elimination
void example1(int *p) {
    int x = *p;        // If we reach here, p is non-null
    if (p == NULL) {   // Compiler: "This is impossible"
        abort();       // Eliminated!
    }
}

// 2. Loop optimization
int example2(unsigned int n) {
    int sum = 0;
    for (int i = 0; i < n; i++) {  // i can't overflow (UB)
        sum += i;                    // Compiler can use closed form
    }
    return sum;  // Might become n*(n-1)/2 directly
}

// 3. Infinite loop elimination
int example3(void) {
    while (1) {
        // Nothing with side effects
    }
    return 0;  // Compiler might assume this is reachable
}

// 4. Time travel
int fermat(void) {
    int a = 1, b = 1, c = 1;
    while (1) {
        if (a*a*a + b*b*b == c*c*c) {
            return 1;  // Found Fermat counterexample!
        }
        // ... increment a, b, c ...
    }
}
// Compiler might return 1 immediately!
// (If loop has UB, compiler assumes it terminates)

Tools for Finding UB

Compiler Sanitizers

# Address Sanitizer (buffer overflows, use-after-free)
gcc -fsanitize=address -g program.c -o program

# Undefined Behavior Sanitizer
gcc -fsanitize=undefined -g program.c -o program

# Memory Sanitizer (uninitialized reads) - Clang only
clang -fsanitize=memory -g program.c -o program

# Thread Sanitizer (data races)
gcc -fsanitize=thread -g program.c -o program

# Combine them
gcc -fsanitize=address,undefined -g program.c -o program

Valgrind

valgrind --tool=memcheck --leak-check=full ./program

Static Analysis

# GCC static analyzer
gcc -fanalyzer program.c

# Clang static analyzer
scan-build gcc program.c

# Cppcheck
cppcheck --enable=all program.c

Defensive Coding Practices

// 1. Initialize everything
int x = 0;
char buffer[100] = {0};

// 2. Check pointers before dereference
if (ptr != NULL) {
    *ptr = value;
}

// 3. Validate array indices
if (index < array_size) {
    array[index] = value;
}

// 4. Use safe integer operations
#include <stdint.h>
if (a <= INT_MAX - b) {
    result = a + b;
}

// 5. Use unsigned for bit operations
uint32_t flags = 1U << bit;

// 6. Avoid clever code
// BAD: arr[i++] = arr[i++]
// GOOD: 
tmp = arr[i]; i++; arr[i] = tmp; i++;

// 7. Enable all warnings
// gcc -Wall -Wextra -Werror -pedantic

// 8. Use static analysis and sanitizers in CI

Exercises

1

UB Hunter

Find all undefined behavior in a provided code sample (at least 5 instances).
2

Safe Integer Library

Implement safe add, subtract, multiply, divide functions that detect overflow.
3

Sanitizer Setup

Set up a project with all sanitizers enabled in debug mode and run your data structures code through them.
4

Aliasing Exploration

Write code that behaves differently with -O0 vs -O2 due to strict aliasing, then fix it.

Next Up

System Calls & POSIX

Interface with the operating system