Skip to main content

Documentation Index

Fetch the complete documentation index at: https://resources.devweekends.com/llms.txt

Use this file to discover all available pages before exploring further.

Project: Key-Value Store

Build a production-quality key-value store with persistence, memory management, and basic indexing. This project teaches file I/O, data structures, and systems design. This is one of the most instructive systems projects you can build because it sits at the intersection of nearly every systems programming topic: hash tables for fast lookups, file I/O for persistence, binary serialization for the on-disk format, write-ahead logging for crash safety, and reader-writer locks for concurrent access. Redis, LevelDB, and RocksDB all started from fundamentally the same building blocks you will implement here.

Features

1

In-Memory Storage

Hash table for fast O(1) lookups
2

Persistence

Write-ahead log and snapshots
3

Variable-Size Values

Store arbitrary binary data
4

Concurrent Access

Thread-safe operations

Data Structures

The architecture mirrors what real databases use at a simplified scale. Think of it as a library system: the hash table is the card catalog (fast lookup by title), each bucket’s linked list handles collisions (multiple books filed under the same catalog number), the write-ahead log is the librarian’s notebook (recording every checkout and return before updating the card catalog, so nothing is lost if the power goes out), and the snapshot is a complete photocopy of the catalog at a point in time.
// kvstore.h
#ifndef KVSTORE_H
#define KVSTORE_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>

#define MAX_KEY_SIZE 256
#define INITIAL_CAPACITY 1024
// When the hash table is more than 75% full, collisions become frequent enough
// to degrade O(1) lookups toward O(n). Doubling the capacity at 0.75 load
// keeps average chain length below 1, maintaining fast lookups.
#define LOAD_FACTOR 0.75

typedef struct Entry {
    char *key;              // Heap-allocated copy of the key (owned by this entry)
    void *value;            // Heap-allocated copy of the value (arbitrary binary data)
    size_t value_size;      // Size of value in bytes (needed for binary-safe storage)
    struct Entry *next;     // Next entry in this bucket's collision chain
    uint64_t version;       // Monotonically increasing version for optimistic locking
    time_t created_at;      // First insertion timestamp
    time_t updated_at;      // Last modification timestamp
} Entry;

typedef struct {
    Entry **buckets;
    size_t capacity;
    size_t size;
    pthread_rwlock_t lock;
    
    // Persistence
    FILE *wal;           // Write-ahead log
    char *data_dir;
    uint64_t version;    // Global version counter
} KVStore;

// Core operations
KVStore *kv_create(const char *data_dir);
void kv_destroy(KVStore *store);

int kv_put(KVStore *store, const char *key, const void *value, size_t size);
int kv_get(KVStore *store, const char *key, void **value, size_t *size);
int kv_delete(KVStore *store, const char *key);
bool kv_exists(KVStore *store, const char *key);

// Persistence
int kv_save(KVStore *store);
int kv_load(KVStore *store);

// Iteration
typedef void (*kv_callback)(const char *key, const void *value, size_t size, void *ctx);
void kv_foreach(KVStore *store, kv_callback cb, void *ctx);

// Stats
typedef struct {
    size_t count;
    size_t memory_used;
    size_t bucket_count;
    double load_factor;
} KVStats;

KVStats kv_stats(KVStore *store);

#endif

Hash Table Implementation

// hashtable.c
#include "kvstore.h"

// FNV-1a hash -- a fast, well-distributed hash function used widely in practice.
// The algorithm: start with a large prime "offset basis", then for each byte,
// XOR the byte in and multiply by a "prime". The XOR-then-multiply order
// (FNV-1a vs FNV-1) gives slightly better avalanche properties, meaning
// small changes in input produce large changes in output -- exactly what you
// want for hash table distribution.
static uint64_t hash_key(const char *key) {
    uint64_t hash = 0xcbf29ce484222325ULL;  // FNV offset basis
    while (*key) {
        hash ^= (uint8_t)*key++;             // XOR in one byte of key
        hash *= 0x100000001b3ULL;            // Multiply by FNV prime
    }
    return hash;
}

static Entry *entry_create(const char *key, const void *value, size_t size) {
    Entry *entry = calloc(1, sizeof(Entry));
    if (!entry) return NULL;
    
    entry->key = strdup(key);
    entry->value = malloc(size);
    
    if (!entry->key || !entry->value) {
        free(entry->key);
        free(entry->value);
        free(entry);
        return NULL;
    }
    
    memcpy(entry->value, value, size);
    entry->value_size = size;
    entry->created_at = time(NULL);
    entry->updated_at = entry->created_at;
    
    return entry;
}

static void entry_destroy(Entry *entry) {
    if (entry) {
        free(entry->key);
        free(entry->value);
        free(entry);
    }
}

static int resize_if_needed(KVStore *store) {
    double load = (double)store->size / store->capacity;
    if (load < LOAD_FACTOR) return 0;
    
    size_t new_capacity = store->capacity * 2;
    Entry **new_buckets = calloc(new_capacity, sizeof(Entry*));
    if (!new_buckets) return -1;
    
    // Rehash all entries
    for (size_t i = 0; i < store->capacity; i++) {
        Entry *entry = store->buckets[i];
        while (entry) {
            Entry *next = entry->next;
            size_t new_index = hash_key(entry->key) % new_capacity;
            entry->next = new_buckets[new_index];
            new_buckets[new_index] = entry;
            entry = next;
        }
    }
    
    free(store->buckets);
    store->buckets = new_buckets;
    store->capacity = new_capacity;
    
    return 0;
}

KVStore *kv_create(const char *data_dir) {
    KVStore *store = calloc(1, sizeof(KVStore));
    if (!store) return NULL;
    
    store->buckets = calloc(INITIAL_CAPACITY, sizeof(Entry*));
    if (!store->buckets) {
        free(store);
        return NULL;
    }
    
    store->capacity = INITIAL_CAPACITY;
    store->data_dir = data_dir ? strdup(data_dir) : NULL;
    
    pthread_rwlock_init(&store->lock, NULL);
    
    // Open WAL if data directory specified
    if (data_dir) {
        char wal_path[512];
        snprintf(wal_path, sizeof(wal_path), "%s/wal.log", data_dir);
        store->wal = fopen(wal_path, "a+b");
    }
    
    return store;
}

void kv_destroy(KVStore *store) {
    if (!store) return;
    
    pthread_rwlock_wrlock(&store->lock);
    
    for (size_t i = 0; i < store->capacity; i++) {
        Entry *entry = store->buckets[i];
        while (entry) {
            Entry *next = entry->next;
            entry_destroy(entry);
            entry = next;
        }
    }
    
    free(store->buckets);
    free(store->data_dir);
    if (store->wal) fclose(store->wal);
    
    pthread_rwlock_unlock(&store->lock);
    pthread_rwlock_destroy(&store->lock);
    
    free(store);
}

Core Operations

// operations.c
#include "kvstore.h"

int kv_put(KVStore *store, const char *key, const void *value, size_t size) {
    if (!store || !key || !value || size == 0) return -1;
    if (strlen(key) >= MAX_KEY_SIZE) return -1;
    
    pthread_rwlock_wrlock(&store->lock);
    
    // Check if we need to resize
    resize_if_needed(store);
    
    size_t index = hash_key(key) % store->capacity;
    
    // Look for existing entry
    Entry *entry = store->buckets[index];
    while (entry) {
        if (strcmp(entry->key, key) == 0) {
            // Update existing
            void *new_value = malloc(size);
            if (!new_value) {
                pthread_rwlock_unlock(&store->lock);
                return -1;
            }
            
            memcpy(new_value, value, size);
            free(entry->value);
            entry->value = new_value;
            entry->value_size = size;
            entry->updated_at = time(NULL);
            entry->version++;
            
            // Write to WAL
            if (store->wal) {
                // WAL entry format: op(1) | key_len(2) | key | value_len(4) | value
                uint8_t op = 'P';  // Put
                uint16_t key_len = strlen(key);
                uint32_t val_len = size;
                
                fwrite(&op, 1, 1, store->wal);
                fwrite(&key_len, sizeof(key_len), 1, store->wal);
                fwrite(key, 1, key_len, store->wal);
                fwrite(&val_len, sizeof(val_len), 1, store->wal);
                fwrite(value, 1, size, store->wal);
                fflush(store->wal);
            }
            
            store->version++;
            pthread_rwlock_unlock(&store->lock);
            return 0;
        }
        entry = entry->next;
    }
    
    // Create new entry
    Entry *new_entry = entry_create(key, value, size);
    if (!new_entry) {
        pthread_rwlock_unlock(&store->lock);
        return -1;
    }
    
    new_entry->version = store->version++;
    new_entry->next = store->buckets[index];
    store->buckets[index] = new_entry;
    store->size++;
    
    // Write to WAL
    if (store->wal) {
        uint8_t op = 'P';
        uint16_t key_len = strlen(key);
        uint32_t val_len = size;
        
        fwrite(&op, 1, 1, store->wal);
        fwrite(&key_len, sizeof(key_len), 1, store->wal);
        fwrite(key, 1, key_len, store->wal);
        fwrite(&val_len, sizeof(val_len), 1, store->wal);
        fwrite(value, 1, size, store->wal);
        fflush(store->wal);
    }
    
    pthread_rwlock_unlock(&store->lock);
    return 0;
}

int kv_get(KVStore *store, const char *key, void **value, size_t *size) {
    if (!store || !key || !value || !size) return -1;
    
    pthread_rwlock_rdlock(&store->lock);
    
    size_t index = hash_key(key) % store->capacity;
    Entry *entry = store->buckets[index];
    
    while (entry) {
        if (strcmp(entry->key, key) == 0) {
            *value = malloc(entry->value_size);
            if (!*value) {
                pthread_rwlock_unlock(&store->lock);
                return -1;
            }
            
            memcpy(*value, entry->value, entry->value_size);
            *size = entry->value_size;
            
            pthread_rwlock_unlock(&store->lock);
            return 0;
        }
        entry = entry->next;
    }
    
    pthread_rwlock_unlock(&store->lock);
    return -1;  // Not found
}

int kv_delete(KVStore *store, const char *key) {
    if (!store || !key) return -1;
    
    pthread_rwlock_wrlock(&store->lock);
    
    size_t index = hash_key(key) % store->capacity;
    Entry *entry = store->buckets[index];
    Entry *prev = NULL;
    
    while (entry) {
        if (strcmp(entry->key, key) == 0) {
            if (prev) {
                prev->next = entry->next;
            } else {
                store->buckets[index] = entry->next;
            }
            
            // Write to WAL
            if (store->wal) {
                uint8_t op = 'D';  // Delete
                uint16_t key_len = strlen(key);
                
                fwrite(&op, 1, 1, store->wal);
                fwrite(&key_len, sizeof(key_len), 1, store->wal);
                fwrite(key, 1, key_len, store->wal);
                fflush(store->wal);
            }
            
            entry_destroy(entry);
            store->size--;
            store->version++;
            
            pthread_rwlock_unlock(&store->lock);
            return 0;
        }
        prev = entry;
        entry = entry->next;
    }
    
    pthread_rwlock_unlock(&store->lock);
    return -1;  // Not found
}

bool kv_exists(KVStore *store, const char *key) {
    if (!store || !key) return false;
    
    pthread_rwlock_rdlock(&store->lock);
    
    size_t index = hash_key(key) % store->capacity;
    Entry *entry = store->buckets[index];
    
    while (entry) {
        if (strcmp(entry->key, key) == 0) {
            pthread_rwlock_unlock(&store->lock);
            return true;
        }
        entry = entry->next;
    }
    
    pthread_rwlock_unlock(&store->lock);
    return false;
}

Persistence

The database needs two persistence mechanisms working together:
  1. Write-Ahead Log (WAL): Every mutation (put/delete) is appended to a log file before modifying the in-memory hash table. If the process crashes, the WAL can be replayed to recover all operations since the last snapshot. This is the same approach used by PostgreSQL, SQLite, and virtually every production database.
  2. Snapshots: Periodically, the entire hash table is written to a single file. After a successful snapshot, the WAL is truncated. Snapshots are written to a temporary file first, then atomically renamed — this ensures that a crash during snapshot writing does not corrupt the existing snapshot.
The combination means: snapshots provide fast recovery (load one file), and the WAL provides durability between snapshots (replay a short log).
// persistence.c
#include "kvstore.h"
#include <sys/stat.h>

// Snapshot file format (binary, little-endian):
// Header: magic(4) | version(4) | count(8)
// Entries: key_len(2) | key | value_len(4) | value | timestamp(8)

#define SNAPSHOT_MAGIC 0x4B565354  // "KVST"
#define SNAPSHOT_VERSION 1

int kv_save(KVStore *store) {
    if (!store || !store->data_dir) return -1;
    
    char snapshot_path[512];
    snprintf(snapshot_path, sizeof(snapshot_path), "%s/data.snap", store->data_dir);
    
    char temp_path[512];
    snprintf(temp_path, sizeof(temp_path), "%s/data.snap.tmp", store->data_dir);
    
    pthread_rwlock_rdlock(&store->lock);
    
    FILE *f = fopen(temp_path, "wb");
    if (!f) {
        pthread_rwlock_unlock(&store->lock);
        return -1;
    }
    
    // Write header
    uint32_t magic = SNAPSHOT_MAGIC;
    uint32_t version = SNAPSHOT_VERSION;
    uint64_t count = store->size;
    
    fwrite(&magic, sizeof(magic), 1, f);
    fwrite(&version, sizeof(version), 1, f);
    fwrite(&count, sizeof(count), 1, f);
    
    // Write entries
    for (size_t i = 0; i < store->capacity; i++) {
        Entry *entry = store->buckets[i];
        while (entry) {
            uint16_t key_len = strlen(entry->key);
            uint32_t value_len = entry->value_size;
            uint64_t timestamp = entry->updated_at;
            
            fwrite(&key_len, sizeof(key_len), 1, f);
            fwrite(entry->key, 1, key_len, f);
            fwrite(&value_len, sizeof(value_len), 1, f);
            fwrite(entry->value, 1, value_len, f);
            fwrite(&timestamp, sizeof(timestamp), 1, f);
            
            entry = entry->next;
        }
    }
    
    fclose(f);
    pthread_rwlock_unlock(&store->lock);
    
    // Atomic rename -- this is the critical safety mechanism.
    // rename() on POSIX is guaranteed to be atomic: the old snapshot is replaced
    // in a single operation. If the process crashes before rename(), we still have
    // the old snapshot intact. If it crashes during rename(), one of the two files
    // is complete. We never have a half-written snapshot on disk.
    if (rename(temp_path, snapshot_path) != 0) {
        unlink(temp_path);
        return -1;
    }
    
    // Clear WAL after successful snapshot
    if (store->wal) {
        char wal_path[512];
        snprintf(wal_path, sizeof(wal_path), "%s/wal.log", store->data_dir);
        
        fclose(store->wal);
        store->wal = fopen(wal_path, "wb");  // Truncate
    }
    
    return 0;
}

int kv_load(KVStore *store) {
    if (!store || !store->data_dir) return -1;
    
    char snapshot_path[512];
    snprintf(snapshot_path, sizeof(snapshot_path), "%s/data.snap", store->data_dir);
    
    FILE *f = fopen(snapshot_path, "rb");
    if (!f) {
        // No snapshot, try to replay WAL
        return kv_replay_wal(store);
    }
    
    // Read header
    uint32_t magic, version;
    uint64_t count;
    
    if (fread(&magic, sizeof(magic), 1, f) != 1 || magic != SNAPSHOT_MAGIC) {
        fclose(f);
        return -1;
    }
    
    fread(&version, sizeof(version), 1, f);
    fread(&count, sizeof(count), 1, f);
    
    // Read entries
    for (uint64_t i = 0; i < count; i++) {
        uint16_t key_len;
        uint32_t value_len;
        uint64_t timestamp;
        
        if (fread(&key_len, sizeof(key_len), 1, f) != 1) break;
        
        char *key = malloc(key_len + 1);
        fread(key, 1, key_len, f);
        key[key_len] = '\0';
        
        fread(&value_len, sizeof(value_len), 1, f);
        void *value = malloc(value_len);
        fread(value, 1, value_len, f);
        
        fread(&timestamp, sizeof(timestamp), 1, f);
        
        kv_put(store, key, value, value_len);
        
        free(key);
        free(value);
    }
    
    fclose(f);
    
    // Replay WAL for any operations after snapshot
    kv_replay_wal(store);
    
    return 0;
}

static int kv_replay_wal(KVStore *store) {
    if (!store->wal) return 0;
    
    rewind(store->wal);
    
    while (!feof(store->wal)) {
        uint8_t op;
        if (fread(&op, 1, 1, store->wal) != 1) break;
        
        uint16_t key_len;
        fread(&key_len, sizeof(key_len), 1, store->wal);
        
        char *key = malloc(key_len + 1);
        fread(key, 1, key_len, store->wal);
        key[key_len] = '\0';
        
        if (op == 'P') {
            uint32_t value_len;
            fread(&value_len, sizeof(value_len), 1, store->wal);
            
            void *value = malloc(value_len);
            fread(value, 1, value_len, store->wal);
            
            kv_put(store, key, value, value_len);
            free(value);
        } else if (op == 'D') {
            kv_delete(store, key);
        }
        
        free(key);
    }
    
    return 0;
}

CLI Interface

// cli.c
#include "kvstore.h"
#include <readline/readline.h>
#include <readline/history.h>

void print_help(void) {
    printf("Commands:\n");
    printf("  SET <key> <value>   Store a key-value pair\n");
    printf("  GET <key>           Retrieve a value\n");
    printf("  DEL <key>           Delete a key\n");
    printf("  EXISTS <key>        Check if key exists\n");
    printf("  KEYS                List all keys\n");
    printf("  STATS               Show statistics\n");
    printf("  SAVE                Save to disk\n");
    printf("  HELP                Show this help\n");
    printf("  QUIT                Exit\n");
}

int main(int argc, char *argv[]) {
    const char *data_dir = argc > 1 ? argv[1] : "./kvdata";
    
    // Create data directory
    mkdir(data_dir, 0755);
    
    KVStore *store = kv_create(data_dir);
    if (!store) {
        fprintf(stderr, "Failed to create store\n");
        return 1;
    }
    
    // Load existing data
    kv_load(store);
    
    printf("KVStore ready. Type HELP for commands.\n");
    
    char *line;
    while ((line = readline("kv> ")) != NULL) {
        if (strlen(line) == 0) {
            free(line);
            continue;
        }
        
        add_history(line);
        
        char cmd[32], key[MAX_KEY_SIZE], value[4096];
        
        if (sscanf(line, "%31s", cmd) != 1) {
            free(line);
            continue;
        }
        
        if (strcasecmp(cmd, "SET") == 0) {
            if (sscanf(line, "%*s %255s %4095[^\n]", key, value) == 2) {
                if (kv_put(store, key, value, strlen(value) + 1) == 0) {
                    printf("OK\n");
                } else {
                    printf("ERROR\n");
                }
            } else {
                printf("Usage: SET <key> <value>\n");
            }
        }
        else if (strcasecmp(cmd, "GET") == 0) {
            if (sscanf(line, "%*s %255s", key) == 1) {
                void *value;
                size_t size;
                if (kv_get(store, key, &value, &size) == 0) {
                    printf("%s\n", (char*)value);
                    free(value);
                } else {
                    printf("(nil)\n");
                }
            } else {
                printf("Usage: GET <key>\n");
            }
        }
        else if (strcasecmp(cmd, "DEL") == 0) {
            if (sscanf(line, "%*s %255s", key) == 1) {
                if (kv_delete(store, key) == 0) {
                    printf("OK\n");
                } else {
                    printf("NOT FOUND\n");
                }
            }
        }
        else if (strcasecmp(cmd, "EXISTS") == 0) {
            if (sscanf(line, "%*s %255s", key) == 1) {
                printf("%s\n", kv_exists(store, key) ? "1" : "0");
            }
        }
        else if (strcasecmp(cmd, "KEYS") == 0) {
            void print_key(const char *k, const void *v, size_t s, void *ctx) {
                (void)v; (void)s; (void)ctx;
                printf("%s\n", k);
            }
            kv_foreach(store, print_key, NULL);
        }
        else if (strcasecmp(cmd, "STATS") == 0) {
            KVStats stats = kv_stats(store);
            printf("Keys: %zu\n", stats.count);
            printf("Buckets: %zu\n", stats.bucket_count);
            printf("Load factor: %.2f\n", stats.load_factor);
        }
        else if (strcasecmp(cmd, "SAVE") == 0) {
            if (kv_save(store) == 0) {
                printf("OK\n");
            } else {
                printf("ERROR\n");
            }
        }
        else if (strcasecmp(cmd, "HELP") == 0) {
            print_help();
        }
        else if (strcasecmp(cmd, "QUIT") == 0) {
            free(line);
            break;
        }
        else {
            printf("Unknown command: %s\n", cmd);
        }
        
        free(line);
    }
    
    // Save before exit
    kv_save(store);
    kv_destroy(store);
    
    printf("Goodbye!\n");
    return 0;
}

Extensions

TTL/Expiration

Add automatic key expiration

Transactions

Implement MULTI/EXEC transactions

Replication

Add master-slave replication

Compression

Compress values with LZ4 or zstd

Binary Protocol

Add a Redis-like binary protocol

Cluster Mode

Shard data across nodes

Next Up

Build a Memory Allocator

Implement your own malloc