Skip to main content

Linux Kernel Module Development

Learn to write kernel modules - code that runs with full kernel privileges. This is the ultimate systems programming: no standard library, no user-space protections, direct hardware access.

Kernel vs User Space

  • Virtual memory protection
  • Standard library (glibc)
  • System call interface
  • Crashes are isolated
  • Normal C runtime

Minimal Kernel Module

// hello.c - Simplest kernel module
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Hello World Kernel Module");
MODULE_VERSION("1.0");

static int __init hello_init(void) {
    printk(KERN_INFO "Hello, Kernel World!\n");
    return 0;  // 0 = success
}

static void __exit hello_exit(void) {
    printk(KERN_INFO "Goodbye, Kernel World!\n");
}

module_init(hello_init);
module_exit(hello_exit);

Building Kernel Modules

# Makefile
obj-m += hello.o

# For multi-file modules:
# mymodule-objs := file1.o file2.o
# obj-m += mymodule.o

KDIR := /lib/modules/$(shell uname -r)/build

all:
	make -C $(KDIR) M=$(PWD) modules

clean:
	make -C $(KDIR) M=$(PWD) clean

install:
	sudo insmod hello.ko

remove:
	sudo rmmod hello

# View kernel messages
log:
	sudo dmesg | tail -20
# Build and test
make
sudo insmod hello.ko
dmesg | tail
sudo rmmod hello
dmesg | tail

Module Parameters

#include <linux/moduleparam.h>

static int count = 1;
static char *name = "default";
static int values[5];
static int values_count;

// Declare parameters
module_param(count, int, 0644);
MODULE_PARM_DESC(count, "Number of iterations");

module_param(name, charp, 0644);
MODULE_PARM_DESC(name, "User name");

module_param_array(values, int, &values_count, 0644);
MODULE_PARM_DESC(values, "Array of integers");

static int __init params_init(void) {
    int i;
    
    printk(KERN_INFO "count = %d\n", count);
    printk(KERN_INFO "name = %s\n", name);
    
    for (i = 0; i < values_count; i++) {
        printk(KERN_INFO "values[%d] = %d\n", i, values[i]);
    }
    
    return 0;
}

// Usage: insmod params.ko count=5 name="test" values=1,2,3

Character Device Driver

// chardev.c - Character device driver
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/uaccess.h>

#define DEVICE_NAME "mychardev"
#define BUFFER_SIZE 1024

static dev_t dev_num;
static struct cdev my_cdev;
static struct class *my_class;
static char device_buffer[BUFFER_SIZE];
static int buffer_pointer = 0;

// File operations
static int device_open(struct inode *inode, struct file *file) {
    printk(KERN_INFO "Device opened\n");
    return 0;
}

static int device_release(struct inode *inode, struct file *file) {
    printk(KERN_INFO "Device closed\n");
    return 0;
}

static ssize_t device_read(struct file *file, char __user *buf,
                           size_t count, loff_t *offset) {
    int bytes_to_read = min((size_t)(buffer_pointer - *offset), count);
    
    if (bytes_to_read <= 0)
        return 0;
    
    if (copy_to_user(buf, device_buffer + *offset, bytes_to_read))
        return -EFAULT;
    
    *offset += bytes_to_read;
    return bytes_to_read;
}

static ssize_t device_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *offset) {
    int bytes_to_write = min(count, (size_t)(BUFFER_SIZE - 1));
    
    if (copy_from_user(device_buffer, buf, bytes_to_write))
        return -EFAULT;
    
    device_buffer[bytes_to_write] = '\0';
    buffer_pointer = bytes_to_write;
    
    printk(KERN_INFO "Received %d bytes\n", bytes_to_write);
    return bytes_to_write;
}

static struct file_operations fops = {
    .owner = THIS_MODULE,
    .open = device_open,
    .release = device_release,
    .read = device_read,
    .write = device_write,
};

static int __init chardev_init(void) {
    int ret;
    
    // Allocate device number
    ret = alloc_chrdev_region(&dev_num, 0, 1, DEVICE_NAME);
    if (ret < 0) {
        printk(KERN_ERR "Failed to allocate device number\n");
        return ret;
    }
    
    // Initialize cdev
    cdev_init(&my_cdev, &fops);
    my_cdev.owner = THIS_MODULE;
    
    ret = cdev_add(&my_cdev, dev_num, 1);
    if (ret < 0) {
        unregister_chrdev_region(dev_num, 1);
        return ret;
    }
    
    // Create device class
    my_class = class_create(DEVICE_NAME);
    if (IS_ERR(my_class)) {
        cdev_del(&my_cdev);
        unregister_chrdev_region(dev_num, 1);
        return PTR_ERR(my_class);
    }
    
    // Create device node
    device_create(my_class, NULL, dev_num, NULL, DEVICE_NAME);
    
    printk(KERN_INFO "Char device registered: major=%d, minor=%d\n",
           MAJOR(dev_num), MINOR(dev_num));
    
    return 0;
}

static void __exit chardev_exit(void) {
    device_destroy(my_class, dev_num);
    class_destroy(my_class);
    cdev_del(&my_cdev);
    unregister_chrdev_region(dev_num, 1);
    printk(KERN_INFO "Char device unregistered\n");
}

module_init(chardev_init);
module_exit(chardev_exit);
MODULE_LICENSE("GPL");
Test the driver:
# Build and load
make
sudo insmod chardev.ko

# Test read/write
echo "Hello Kernel" > /dev/mychardev
cat /dev/mychardev

# Unload
sudo rmmod chardev

ioctl Implementation

// ioctl for device control
#include <linux/ioctl.h>

// Define ioctl commands
#define MYDEV_MAGIC 'k'
#define MYDEV_GET_VALUE _IOR(MYDEV_MAGIC, 1, int)
#define MYDEV_SET_VALUE _IOW(MYDEV_MAGIC, 2, int)
#define MYDEV_RESET     _IO(MYDEV_MAGIC, 3)

static int device_value = 0;

static long device_ioctl(struct file *file, unsigned int cmd,
                         unsigned long arg) {
    int ret = 0;
    int tmp;
    
    // Verify command type
    if (_IOC_TYPE(cmd) != MYDEV_MAGIC)
        return -ENOTTY;
    
    switch (cmd) {
    case MYDEV_GET_VALUE:
        ret = copy_to_user((int __user *)arg, &device_value, sizeof(int));
        break;
        
    case MYDEV_SET_VALUE:
        ret = copy_from_user(&tmp, (int __user *)arg, sizeof(int));
        if (ret == 0)
            device_value = tmp;
        break;
        
    case MYDEV_RESET:
        device_value = 0;
        break;
        
    default:
        return -ENOTTY;
    }
    
    return ret ? -EFAULT : 0;
}

static struct file_operations fops = {
    .unlocked_ioctl = device_ioctl,
    // ... other operations
};
User-space program to use ioctl:
#include <stdio.h>
#include <fcntl.h>
#include <sys/ioctl.h>

#define MYDEV_MAGIC 'k'
#define MYDEV_GET_VALUE _IOR(MYDEV_MAGIC, 1, int)
#define MYDEV_SET_VALUE _IOW(MYDEV_MAGIC, 2, int)
#define MYDEV_RESET     _IO(MYDEV_MAGIC, 3)

int main() {
    int fd = open("/dev/mychardev", O_RDWR);
    int value;
    
    // Set value
    value = 42;
    ioctl(fd, MYDEV_SET_VALUE, &value);
    
    // Get value
    ioctl(fd, MYDEV_GET_VALUE, &value);
    printf("Value: %d\n", value);
    
    // Reset
    ioctl(fd, MYDEV_RESET);
    
    close(fd);
    return 0;
}

Kernel Memory Allocation

#include <linux/slab.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>

void memory_examples(void) {
    void *ptr;
    struct page *page;
    
    // kmalloc - small contiguous allocations (up to ~128KB)
    ptr = kmalloc(1024, GFP_KERNEL);
    if (ptr) {
        // Use memory
        kfree(ptr);
    }
    
    // kzalloc - zeroed allocation
    ptr = kzalloc(1024, GFP_KERNEL);
    if (ptr) {
        kfree(ptr);
    }
    
    // vmalloc - large non-contiguous allocations
    ptr = vmalloc(1024 * 1024);  // 1MB
    if (ptr) {
        vfree(ptr);
    }
    
    // Get free pages - for DMA or large buffers
    page = alloc_pages(GFP_KERNEL, 2);  // 2^2 = 4 pages
    if (page) {
        ptr = page_address(page);
        free_pages((unsigned long)ptr, 2);
    }
}

// GFP flags:
// GFP_KERNEL  - Normal allocation, may sleep
// GFP_ATOMIC  - Interrupt context, never sleeps
// GFP_DMA     - DMA-capable memory

// Slab cache for frequent allocations of same-size objects
static struct kmem_cache *my_cache;

struct my_object {
    int data[16];
    struct list_head list;
};

void slab_example(void) {
    struct my_object *obj;
    
    // Create cache
    my_cache = kmem_cache_create("my_objects",
                                  sizeof(struct my_object),
                                  0,  // Alignment
                                  0,  // Flags
                                  NULL);  // Constructor
    
    // Allocate object
    obj = kmem_cache_alloc(my_cache, GFP_KERNEL);
    
    // Free object
    kmem_cache_free(my_cache, obj);
    
    // Destroy cache
    kmem_cache_destroy(my_cache);
}

Synchronization Primitives

#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/semaphore.h>
#include <linux/rwlock.h>
#include <linux/atomic.h>

// Spinlock - for short critical sections, interrupt context OK
static DEFINE_SPINLOCK(my_spinlock);
static unsigned long flags;

void spinlock_example(void) {
    spin_lock_irqsave(&my_spinlock, flags);
    // Critical section - very short!
    spin_unlock_irqrestore(&my_spinlock, flags);
}

// Mutex - for longer critical sections, sleepable
static DEFINE_MUTEX(my_mutex);

void mutex_example(void) {
    mutex_lock(&my_mutex);
    // Critical section - can sleep
    mutex_unlock(&my_mutex);
}

// Read-write lock
static DEFINE_RWLOCK(my_rwlock);

void rwlock_example(void) {
    // Multiple readers allowed
    read_lock(&my_rwlock);
    // Read data
    read_unlock(&my_rwlock);
    
    // Exclusive write access
    write_lock(&my_rwlock);
    // Modify data
    write_unlock(&my_rwlock);
}

// Atomic operations
static atomic_t counter = ATOMIC_INIT(0);

void atomic_example(void) {
    atomic_inc(&counter);
    atomic_dec(&counter);
    atomic_add(5, &counter);
    int val = atomic_read(&counter);
    
    // Atomic test-and-set
    if (atomic_cmpxchg(&counter, 0, 1) == 0) {
        // Successfully changed from 0 to 1
    }
}

// Completion - waiting for events
static DECLARE_COMPLETION(my_completion);

void completion_example(void) {
    // Waiter thread
    wait_for_completion(&my_completion);
    
    // Signaler thread
    complete(&my_completion);  // Wake one waiter
    // complete_all(&my_completion);  // Wake all waiters
}

Kernel Threads

#include <linux/kthread.h>
#include <linux/delay.h>

static struct task_struct *my_thread;
static int stop_thread = 0;

static int thread_function(void *data) {
    int count = 0;
    
    printk(KERN_INFO "Thread started\n");
    
    while (!kthread_should_stop()) {
        printk(KERN_INFO "Thread iteration %d\n", count++);
        
        // Sleep for 1 second
        msleep(1000);
        
        // Or sleep interruptibly
        // ssleep(1);
    }
    
    printk(KERN_INFO "Thread stopping\n");
    return 0;
}

static int __init thread_init(void) {
    my_thread = kthread_run(thread_function, NULL, "my_kthread");
    
    if (IS_ERR(my_thread)) {
        printk(KERN_ERR "Failed to create thread\n");
        return PTR_ERR(my_thread);
    }
    
    return 0;
}

static void __exit thread_exit(void) {
    if (my_thread) {
        kthread_stop(my_thread);
    }
}

Work Queues

#include <linux/workqueue.h>

// Declare work structure
static struct work_struct my_work;

// Work function
static void work_handler(struct work_struct *work) {
    printk(KERN_INFO "Work handler executed\n");
    // Do deferred work here
}

static int __init workqueue_init(void) {
    // Initialize work
    INIT_WORK(&my_work, work_handler);
    
    // Schedule work on system workqueue
    schedule_work(&my_work);
    
    return 0;
}

// Delayed work
static struct delayed_work my_delayed_work;

static void delayed_work_handler(struct work_struct *work) {
    printk(KERN_INFO "Delayed work executed\n");
}

static int __init delayed_init(void) {
    INIT_DELAYED_WORK(&my_delayed_work, delayed_work_handler);
    
    // Execute after 5 seconds
    schedule_delayed_work(&my_delayed_work, 5 * HZ);
    
    return 0;
}

static void __exit workqueue_exit(void) {
    cancel_work_sync(&my_work);
    cancel_delayed_work_sync(&my_delayed_work);
}

Interrupt Handlers

#include <linux/interrupt.h>

static int irq_number = 10;  // Example IRQ
static irqreturn_t my_irq_handler(int irq, void *dev_id);

// Tasklet for bottom-half processing
static DECLARE_TASKLET(my_tasklet, my_tasklet_function);

static void my_tasklet_function(struct tasklet_struct *t) {
    // Deferred interrupt processing
    printk(KERN_INFO "Tasklet executed\n");
}

static irqreturn_t my_irq_handler(int irq, void *dev_id) {
    // Top-half: quick processing only
    
    // Acknowledge hardware interrupt
    // ...
    
    // Schedule bottom-half
    tasklet_schedule(&my_tasklet);
    
    return IRQ_HANDLED;
}

static int __init irq_init(void) {
    int ret;
    
    ret = request_irq(irq_number,
                      my_irq_handler,
                      IRQF_SHARED,
                      "my_device",
                      (void *)my_irq_handler);
    
    if (ret) {
        printk(KERN_ERR "Failed to register IRQ\n");
        return ret;
    }
    
    return 0;
}

static void __exit irq_exit(void) {
    tasklet_kill(&my_tasklet);
    free_irq(irq_number, (void *)my_irq_handler);
}

Procfs Interface

#include <linux/proc_fs.h>
#include <linux/seq_file.h>

static struct proc_dir_entry *proc_entry;
static int counter = 0;

static int my_show(struct seq_file *m, void *v) {
    seq_printf(m, "Counter: %d\n", counter);
    return 0;
}

static int my_open(struct inode *inode, struct file *file) {
    return single_open(file, my_show, NULL);
}

static ssize_t my_write(struct file *file, const char __user *buf,
                        size_t count, loff_t *off) {
    char kbuf[16];
    
    if (count > 15)
        return -EINVAL;
    
    if (copy_from_user(kbuf, buf, count))
        return -EFAULT;
    
    kbuf[count] = '\0';
    if (kstrtoint(kbuf, 10, &counter))
        return -EINVAL;
    
    return count;
}

static const struct proc_ops my_proc_ops = {
    .proc_open = my_open,
    .proc_read = seq_read,
    .proc_write = my_write,
    .proc_release = single_release,
};

static int __init proc_init(void) {
    proc_entry = proc_create("my_counter", 0666, NULL, &my_proc_ops);
    return proc_entry ? 0 : -ENOMEM;
}

static void __exit proc_exit(void) {
    proc_remove(proc_entry);
}

Debugging Techniques

// Printk levels
printk(KERN_EMERG   "Emergency\n");   // System unusable
printk(KERN_ALERT   "Alert\n");       // Immediate action needed
printk(KERN_CRIT    "Critical\n");    // Critical conditions
printk(KERN_ERR     "Error\n");       // Error conditions
printk(KERN_WARNING "Warning\n");     // Warning conditions
printk(KERN_NOTICE  "Notice\n");      // Normal but significant
printk(KERN_INFO    "Info\n");        // Informational
printk(KERN_DEBUG   "Debug\n");       // Debug messages

// Dynamic debug
#define DEBUG
#include <linux/printk.h>

pr_debug("Debug message\n");  // Only if DEBUG defined

// Kernel debugging options
// Enable in kernel config:
// CONFIG_DEBUG_KERNEL=y
// CONFIG_DEBUG_INFO=y
// CONFIG_KGDB=y

// Assert-like functionality
#define WARN_ON(condition)
#define BUG_ON(condition)  // Crashes kernel if true!

// Memory debugging
// CONFIG_DEBUG_SLAB=y
// CONFIG_KASAN=y (Kernel Address Sanitizer)

Best Practices

  1. Never sleep with spinlocks held
  2. Always check return values - especially for allocations
  3. Use appropriate GFP flags - GFP_ATOMIC in interrupt context
  4. Keep interrupt handlers short - defer work to tasklets/workqueues
  5. Validate all user input - use copy_from_user, check sizes
  6. Release all resources on exit - memory, IRQs, device numbers
  7. Test thoroughly - kernel bugs crash the system
  8. Follow kernel coding style - scripts/checkpatch.pl

Resources


Next Up

Course Recap

Review everything you’ve learned