Documentation Index
Fetch the complete documentation index at: https://resources.devweekends.com/llms.txt
Use this file to discover all available pages before exploring further.
Linux Kernel Module Development
Learn to write kernel modules — code that runs with full kernel privileges. This is the ultimate systems programming: no standard library, no user-space protections, direct hardware access. There is no safety net here. In user space, a null pointer dereference gives you a segfault and the OS kills your process — annoying but recoverable. In kernel space, the same bug triggers a kernel panic (the Linux equivalent of a Blue Screen of Death), taking down the entire machine and every process running on it. You cannot callprintf, malloc, or any libc function. You use printk instead of printf, kmalloc instead of malloc, and a completely different set of rules for memory management, error handling, and concurrency.
Kernel vs User Space
- User Space
- Kernel Space
- Virtual memory protection
- Standard library (glibc)
- System call interface
- Crashes are isolated
- Normal C runtime
- Direct hardware access
- No standard library
- Special kernel APIs
- Bugs crash the system
- Different runtime rules
Minimal Kernel Module
// hello.c - Simplest kernel module
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Hello World Kernel Module");
MODULE_VERSION("1.0");
static int __init hello_init(void) {
printk(KERN_INFO "Hello, Kernel World!\n");
return 0; // 0 = success
}
static void __exit hello_exit(void) {
printk(KERN_INFO "Goodbye, Kernel World!\n");
}
module_init(hello_init);
module_exit(hello_exit);
Building Kernel Modules
# Makefile
obj-m += hello.o
# For multi-file modules:
# mymodule-objs := file1.o file2.o
# obj-m += mymodule.o
KDIR := /lib/modules/$(shell uname -r)/build
all:
make -C $(KDIR) M=$(PWD) modules
clean:
make -C $(KDIR) M=$(PWD) clean
install:
sudo insmod hello.ko
remove:
sudo rmmod hello
# View kernel messages
log:
sudo dmesg | tail -20
# Build and test
make
sudo insmod hello.ko
dmesg | tail
sudo rmmod hello
dmesg | tail
Module Parameters
#include <linux/moduleparam.h>
static int count = 1;
static char *name = "default";
static int values[5];
static int values_count;
// Declare parameters
module_param(count, int, 0644);
MODULE_PARM_DESC(count, "Number of iterations");
module_param(name, charp, 0644);
MODULE_PARM_DESC(name, "User name");
module_param_array(values, int, &values_count, 0644);
MODULE_PARM_DESC(values, "Array of integers");
static int __init params_init(void) {
int i;
printk(KERN_INFO "count = %d\n", count);
printk(KERN_INFO "name = %s\n", name);
for (i = 0; i < values_count; i++) {
printk(KERN_INFO "values[%d] = %d\n", i, values[i]);
}
return 0;
}
// Usage: insmod params.ko count=5 name="test" values=1,2,3
Character Device Driver
// chardev.c - Character device driver
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/uaccess.h>
#define DEVICE_NAME "mychardev"
#define BUFFER_SIZE 1024
static dev_t dev_num;
static struct cdev my_cdev;
static struct class *my_class;
static char device_buffer[BUFFER_SIZE];
static int buffer_pointer = 0;
// File operations
static int device_open(struct inode *inode, struct file *file) {
printk(KERN_INFO "Device opened\n");
return 0;
}
static int device_release(struct inode *inode, struct file *file) {
printk(KERN_INFO "Device closed\n");
return 0;
}
// __user annotation marks pointers that come from user space. You MUST use
// copy_to_user/copy_from_user to access them. Directly dereferencing a __user
// pointer can crash the kernel (if the user passes an invalid address) and is
// a security vulnerability (kernel reads/writes to arbitrary addresses).
static ssize_t device_read(struct file *file, char __user *buf,
size_t count, loff_t *offset) {
int bytes_to_read = min((size_t)(buffer_pointer - *offset), count);
if (bytes_to_read <= 0)
return 0;
if (copy_to_user(buf, device_buffer + *offset, bytes_to_read))
return -EFAULT;
*offset += bytes_to_read;
return bytes_to_read;
}
static ssize_t device_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset) {
int bytes_to_write = min(count, (size_t)(BUFFER_SIZE - 1));
if (copy_from_user(device_buffer, buf, bytes_to_write))
return -EFAULT;
device_buffer[bytes_to_write] = '\0';
buffer_pointer = bytes_to_write;
printk(KERN_INFO "Received %d bytes\n", bytes_to_write);
return bytes_to_write;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = device_open,
.release = device_release,
.read = device_read,
.write = device_write,
};
static int __init chardev_init(void) {
int ret;
// Allocate device number
ret = alloc_chrdev_region(&dev_num, 0, 1, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ERR "Failed to allocate device number\n");
return ret;
}
// Initialize cdev
cdev_init(&my_cdev, &fops);
my_cdev.owner = THIS_MODULE;
ret = cdev_add(&my_cdev, dev_num, 1);
if (ret < 0) {
unregister_chrdev_region(dev_num, 1);
return ret;
}
// Create device class
my_class = class_create(DEVICE_NAME);
if (IS_ERR(my_class)) {
cdev_del(&my_cdev);
unregister_chrdev_region(dev_num, 1);
return PTR_ERR(my_class);
}
// Create device node
device_create(my_class, NULL, dev_num, NULL, DEVICE_NAME);
printk(KERN_INFO "Char device registered: major=%d, minor=%d\n",
MAJOR(dev_num), MINOR(dev_num));
return 0;
}
static void __exit chardev_exit(void) {
device_destroy(my_class, dev_num);
class_destroy(my_class);
cdev_del(&my_cdev);
unregister_chrdev_region(dev_num, 1);
printk(KERN_INFO "Char device unregistered\n");
}
module_init(chardev_init);
module_exit(chardev_exit);
MODULE_LICENSE("GPL");
# Build and load
make
sudo insmod chardev.ko
# Test read/write
echo "Hello Kernel" > /dev/mychardev
cat /dev/mychardev
# Unload
sudo rmmod chardev
ioctl Implementation
// ioctl for device control
#include <linux/ioctl.h>
// Define ioctl commands
#define MYDEV_MAGIC 'k'
#define MYDEV_GET_VALUE _IOR(MYDEV_MAGIC, 1, int)
#define MYDEV_SET_VALUE _IOW(MYDEV_MAGIC, 2, int)
#define MYDEV_RESET _IO(MYDEV_MAGIC, 3)
static int device_value = 0;
static long device_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) {
int ret = 0;
int tmp;
// Verify command type
if (_IOC_TYPE(cmd) != MYDEV_MAGIC)
return -ENOTTY;
switch (cmd) {
case MYDEV_GET_VALUE:
ret = copy_to_user((int __user *)arg, &device_value, sizeof(int));
break;
case MYDEV_SET_VALUE:
ret = copy_from_user(&tmp, (int __user *)arg, sizeof(int));
if (ret == 0)
device_value = tmp;
break;
case MYDEV_RESET:
device_value = 0;
break;
default:
return -ENOTTY;
}
return ret ? -EFAULT : 0;
}
static struct file_operations fops = {
.unlocked_ioctl = device_ioctl,
// ... other operations
};
#include <stdio.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#define MYDEV_MAGIC 'k'
#define MYDEV_GET_VALUE _IOR(MYDEV_MAGIC, 1, int)
#define MYDEV_SET_VALUE _IOW(MYDEV_MAGIC, 2, int)
#define MYDEV_RESET _IO(MYDEV_MAGIC, 3)
int main() {
int fd = open("/dev/mychardev", O_RDWR);
int value;
// Set value
value = 42;
ioctl(fd, MYDEV_SET_VALUE, &value);
// Get value
ioctl(fd, MYDEV_GET_VALUE, &value);
printf("Value: %d\n", value);
// Reset
ioctl(fd, MYDEV_RESET);
close(fd);
return 0;
}
Kernel Memory Allocation
#include <linux/slab.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
void memory_examples(void) {
void *ptr;
struct page *page;
// kmalloc - physically contiguous memory, up to ~128KB (varies by system).
// Use for small allocations, DMA buffers, or anything that needs a physical
// address. Backed by the slab allocator, so it is fast for common sizes.
ptr = kmalloc(1024, GFP_KERNEL);
if (ptr) {
// PITFALL: Unlike user-space malloc, kmalloc returns memory from a
// limited pool. Allocating large amounts can fail even when the system
// has plenty of RAM, because it needs CONTIGUOUS physical pages.
kfree(ptr);
}
// kzalloc - same as kmalloc but zero-initialized (like calloc).
// Always prefer kzalloc over kmalloc for structs -- uninitialized kernel
// memory can leak sensitive data to user space (information disclosure vuln).
ptr = kzalloc(1024, GFP_KERNEL);
if (ptr) {
kfree(ptr);
}
// vmalloc - virtually contiguous but physically scattered pages.
// Use for large allocations where you do not need physical contiguity.
// Slower than kmalloc (requires page table manipulation) and cannot be
// used for DMA. Do not use in interrupt context.
ptr = vmalloc(1024 * 1024); // 1MB
if (ptr) {
vfree(ptr);
}
// Get free pages - for DMA or large buffers
page = alloc_pages(GFP_KERNEL, 2); // 2^2 = 4 pages
if (page) {
ptr = page_address(page);
free_pages((unsigned long)ptr, 2);
}
}
// GFP flags -- choosing the wrong one is a common source of kernel bugs:
// GFP_KERNEL - Normal allocation, may sleep. Use in process context (syscalls,
// workqueues). NEVER use in interrupt handlers or with spinlocks held.
// GFP_ATOMIC - Never sleeps, uses emergency memory reserves. Use in interrupt
// context or when holding spinlocks. More likely to fail than GFP_KERNEL.
// GFP_DMA - Memory from the DMA zone (first 16MB on x86). Required for
// legacy ISA DMA devices. Rarely needed on modern hardware.
// Slab cache for frequent allocations of same-size objects
static struct kmem_cache *my_cache;
struct my_object {
int data[16];
struct list_head list;
};
void slab_example(void) {
struct my_object *obj;
// Create cache
my_cache = kmem_cache_create("my_objects",
sizeof(struct my_object),
0, // Alignment
0, // Flags
NULL); // Constructor
// Allocate object
obj = kmem_cache_alloc(my_cache, GFP_KERNEL);
// Free object
kmem_cache_free(my_cache, obj);
// Destroy cache
kmem_cache_destroy(my_cache);
}
Synchronization Primitives
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/semaphore.h>
#include <linux/rwlock.h>
#include <linux/atomic.h>
// Spinlock - the CPU literally spins in a tight loop waiting for the lock.
// Use ONLY for very short critical sections (a few lines, no function calls
// that might sleep). Unlike user-space mutexes, spinlocks disable preemption
// and (with irqsave) interrupts -- calling kmalloc(GFP_KERNEL), schedule(),
// or copy_from_user() while holding a spinlock will deadlock or crash.
static DEFINE_SPINLOCK(my_spinlock);
static unsigned long flags;
void spinlock_example(void) {
spin_lock_irqsave(&my_spinlock, flags); // Save interrupt state + disable IRQs + lock
// Critical section - VERY short, no sleeping, no user-space access
spin_unlock_irqrestore(&my_spinlock, flags); // Restore original interrupt state
}
// Mutex - sleepable lock for longer critical sections.
// Can ONLY be used in process context (not interrupt handlers, not softirqs).
// The calling thread sleeps if the lock is held, freeing the CPU for other work.
static DEFINE_MUTEX(my_mutex);
void mutex_example(void) {
mutex_lock(&my_mutex);
// Critical section - can call sleeping functions like kmalloc(GFP_KERNEL),
// copy_from_user(), msleep(), etc.
mutex_unlock(&my_mutex);
}
// Read-write lock
static DEFINE_RWLOCK(my_rwlock);
void rwlock_example(void) {
// Multiple readers allowed
read_lock(&my_rwlock);
// Read data
read_unlock(&my_rwlock);
// Exclusive write access
write_lock(&my_rwlock);
// Modify data
write_unlock(&my_rwlock);
}
// Atomic operations
static atomic_t counter = ATOMIC_INIT(0);
void atomic_example(void) {
atomic_inc(&counter);
atomic_dec(&counter);
atomic_add(5, &counter);
int val = atomic_read(&counter);
// Atomic test-and-set
if (atomic_cmpxchg(&counter, 0, 1) == 0) {
// Successfully changed from 0 to 1
}
}
// Completion - waiting for events
static DECLARE_COMPLETION(my_completion);
void completion_example(void) {
// Waiter thread
wait_for_completion(&my_completion);
// Signaler thread
complete(&my_completion); // Wake one waiter
// complete_all(&my_completion); // Wake all waiters
}
Kernel Threads
#include <linux/kthread.h>
#include <linux/delay.h>
static struct task_struct *my_thread;
static int stop_thread = 0;
static int thread_function(void *data) {
int count = 0;
printk(KERN_INFO "Thread started\n");
while (!kthread_should_stop()) {
printk(KERN_INFO "Thread iteration %d\n", count++);
// Sleep for 1 second
msleep(1000);
// Or sleep interruptibly
// ssleep(1);
}
printk(KERN_INFO "Thread stopping\n");
return 0;
}
static int __init thread_init(void) {
my_thread = kthread_run(thread_function, NULL, "my_kthread");
if (IS_ERR(my_thread)) {
printk(KERN_ERR "Failed to create thread\n");
return PTR_ERR(my_thread);
}
return 0;
}
static void __exit thread_exit(void) {
if (my_thread) {
kthread_stop(my_thread);
}
}
Work Queues
#include <linux/workqueue.h>
// Declare work structure
static struct work_struct my_work;
// Work function
static void work_handler(struct work_struct *work) {
printk(KERN_INFO "Work handler executed\n");
// Do deferred work here
}
static int __init workqueue_init(void) {
// Initialize work
INIT_WORK(&my_work, work_handler);
// Schedule work on system workqueue
schedule_work(&my_work);
return 0;
}
// Delayed work
static struct delayed_work my_delayed_work;
static void delayed_work_handler(struct work_struct *work) {
printk(KERN_INFO "Delayed work executed\n");
}
static int __init delayed_init(void) {
INIT_DELAYED_WORK(&my_delayed_work, delayed_work_handler);
// Execute after 5 seconds
schedule_delayed_work(&my_delayed_work, 5 * HZ);
return 0;
}
static void __exit workqueue_exit(void) {
cancel_work_sync(&my_work);
cancel_delayed_work_sync(&my_delayed_work);
}
Interrupt Handlers
#include <linux/interrupt.h>
static int irq_number = 10; // Example IRQ
static irqreturn_t my_irq_handler(int irq, void *dev_id);
// Tasklet for bottom-half processing
static DECLARE_TASKLET(my_tasklet, my_tasklet_function);
static void my_tasklet_function(struct tasklet_struct *t) {
// Deferred interrupt processing
printk(KERN_INFO "Tasklet executed\n");
}
static irqreturn_t my_irq_handler(int irq, void *dev_id) {
// Top-half: quick processing only
// Acknowledge hardware interrupt
// ...
// Schedule bottom-half
tasklet_schedule(&my_tasklet);
return IRQ_HANDLED;
}
static int __init irq_init(void) {
int ret;
ret = request_irq(irq_number,
my_irq_handler,
IRQF_SHARED,
"my_device",
(void *)my_irq_handler);
if (ret) {
printk(KERN_ERR "Failed to register IRQ\n");
return ret;
}
return 0;
}
static void __exit irq_exit(void) {
tasklet_kill(&my_tasklet);
free_irq(irq_number, (void *)my_irq_handler);
}
Procfs Interface
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
static struct proc_dir_entry *proc_entry;
static int counter = 0;
static int my_show(struct seq_file *m, void *v) {
seq_printf(m, "Counter: %d\n", counter);
return 0;
}
static int my_open(struct inode *inode, struct file *file) {
return single_open(file, my_show, NULL);
}
static ssize_t my_write(struct file *file, const char __user *buf,
size_t count, loff_t *off) {
char kbuf[16];
if (count > 15)
return -EINVAL;
if (copy_from_user(kbuf, buf, count))
return -EFAULT;
kbuf[count] = '\0';
if (kstrtoint(kbuf, 10, &counter))
return -EINVAL;
return count;
}
static const struct proc_ops my_proc_ops = {
.proc_open = my_open,
.proc_read = seq_read,
.proc_write = my_write,
.proc_release = single_release,
};
static int __init proc_init(void) {
proc_entry = proc_create("my_counter", 0666, NULL, &my_proc_ops);
return proc_entry ? 0 : -ENOMEM;
}
static void __exit proc_exit(void) {
proc_remove(proc_entry);
}
Debugging Techniques
// Printk levels
printk(KERN_EMERG "Emergency\n"); // System unusable
printk(KERN_ALERT "Alert\n"); // Immediate action needed
printk(KERN_CRIT "Critical\n"); // Critical conditions
printk(KERN_ERR "Error\n"); // Error conditions
printk(KERN_WARNING "Warning\n"); // Warning conditions
printk(KERN_NOTICE "Notice\n"); // Normal but significant
printk(KERN_INFO "Info\n"); // Informational
printk(KERN_DEBUG "Debug\n"); // Debug messages
// Dynamic debug
#define DEBUG
#include <linux/printk.h>
pr_debug("Debug message\n"); // Only if DEBUG defined
// Kernel debugging options
// Enable in kernel config:
// CONFIG_DEBUG_KERNEL=y
// CONFIG_DEBUG_INFO=y
// CONFIG_KGDB=y
// Assert-like functionality
#define WARN_ON(condition)
#define BUG_ON(condition) // Crashes kernel if true!
// Memory debugging
// CONFIG_DEBUG_SLAB=y
// CONFIG_KASAN=y (Kernel Address Sanitizer)
Best Practices
Kernel Development Guidelines
Kernel Development Guidelines
- Never sleep with spinlocks held
- Always check return values - especially for allocations
- Use appropriate GFP flags - GFP_ATOMIC in interrupt context
- Keep interrupt handlers short - defer work to tasklets/workqueues
- Validate all user input - use
copy_from_user, check sizes - Release all resources on exit - memory, IRQs, device numbers
- Test thoroughly - kernel bugs crash the system
- Follow kernel coding style -
scripts/checkpatch.pl
Resources
LWN.net
In-depth kernel articles
Kernel Docs
Official documentation
Bootlin
Free training materials
LKML
Kernel mailing list archive
Next Up
Course Recap
Review everything you’ve learned