Chapter 1: Project Setup & Init
In this chapter, we’ll set up our project and implement the first Git command: init. By the end, you’ll understand exactly what happens when you run git init.
Prerequisites : Basic JavaScript knowledge, Node.js installed
Time : 1-2 hours
Outcome : A working mygit init command
What You’ll Learn
How Git’s .git directory is structured
What each file and folder in .git means
How to build a CLI tool in Node.js
Content-addressable storage concepts
Understanding Git’s Directory Structure
When you run git init, Git creates a .git directory with this structure:
.git/
├── HEAD # Points to current branch (ref: refs/heads/master)
├── config # Repository-specific configuration
├── description # Used by GitWeb (we'll skip this)
├── objects/ # Object database (blobs, trees, commits)
│ ├── info/
│ └── pack/
├── refs/ # Branch and tag pointers
│ ├── heads/ # Branch refs (e.g., refs/heads/master)
│ └── tags/ # Tag refs
├── hooks/ # Scripts triggered by Git events
└── info/
└── exclude # Local gitignore (not committed)
The most important parts are HEAD, objects/, and refs/. Everything else is optional for a minimal implementation.
Project Setup
1. Initialize Your Project
mkdir mygit
cd mygit
npm init -y
2. Create the Project Structure
mygit/
├── src/
│ ├── commands/ # Individual command implementations
│ │ └── init.js
│ ├── utils/ # Shared utilities
│ │ └── paths.js
│ └── mygit.js # Main CLI entry point
├── package.json
└── README.md
3. Set Up package.json
{
"name" : "mygit" ,
"version" : "1.0.0" ,
"description" : "A Git implementation for learning" ,
"main" : "src/mygit.js" ,
"bin" : {
"mygit" : "./src/mygit.js"
},
"scripts" : {
"test" : "node test/test.js"
},
"keywords" : [ "git" , "vcs" , "learning" ],
"license" : "MIT"
}
Implementation
Step 1: Create the CLI Entry Point
#!/usr/bin/env node
/**
* mygit - A Git implementation for learning
*
* Usage: mygit <command> [options]
*/
const commands = {
init: require ( './commands/init' ),
// We'll add more commands later
};
function main () {
const args = process . argv . slice ( 2 );
if ( args . length === 0 ) {
console . log ( 'usage: mygit <command> [<args>]' );
console . log ( ' \n Available commands:' );
console . log ( ' init Initialize a new repository' );
process . exit ( 1 );
}
const command = args [ 0 ];
const commandArgs = args . slice ( 1 );
if ( ! commands [ command ]) {
console . error ( `mygit: ' ${ command } ' is not a mygit command.` );
process . exit ( 1 );
}
try {
commands [ command ]. execute ( commandArgs );
} catch ( error ) {
console . error ( `error: ${ error . message } ` );
process . exit ( 1 );
}
}
main ();
Step 2: Create Path Utilities
const path = require ( 'path' );
const fs = require ( 'fs' );
/**
* Find the .git directory by walking up from the current directory
*/
function findGitDir ( startDir = process . cwd ()) {
let currentDir = startDir ;
while ( currentDir !== path . parse ( currentDir ). root ) {
const gitDir = path . join ( currentDir , '.git' );
if ( fs . existsSync ( gitDir ) && fs . statSync ( gitDir ). isDirectory ()) {
return gitDir ;
}
currentDir = path . dirname ( currentDir );
}
return null ;
}
/**
* Get the repository root (parent of .git)
*/
function getRepoRoot ( gitDir ) {
return path . dirname ( gitDir );
}
/**
* Ensure we're in a Git repository
*/
function requireGitDir () {
const gitDir = findGitDir ();
if ( ! gitDir ) {
throw new Error ( 'not a git repository (or any of the parent directories): .git' );
}
return gitDir ;
}
module . exports = {
findGitDir ,
getRepoRoot ,
requireGitDir
};
Step 3: Implement the Init Command
const fs = require ( 'fs' );
const path = require ( 'path' );
/**
* Initialize a new Git repository
*
* Creates the .git directory structure:
* - HEAD: Points to refs/heads/master
* - config: Repository configuration
* - objects/: Object database
* - refs/heads/: Branch refs
* - refs/tags/: Tag refs
*/
function execute ( args ) {
// Parse arguments
const directory = args [ 0 ] || '.' ;
const repoPath = path . resolve ( directory );
const gitDir = path . join ( repoPath , '.git' );
// Check if already a repository
if ( fs . existsSync ( gitDir )) {
console . log ( `Reinitialized existing Git repository in ${ gitDir } ` );
return ;
}
// Create the directory structure
const directories = [
gitDir ,
path . join ( gitDir , 'objects' ),
path . join ( gitDir , 'objects' , 'info' ),
path . join ( gitDir , 'objects' , 'pack' ),
path . join ( gitDir , 'refs' ),
path . join ( gitDir , 'refs' , 'heads' ),
path . join ( gitDir , 'refs' , 'tags' ),
path . join ( gitDir , 'info' ),
];
directories . forEach ( dir => {
fs . mkdirSync ( dir , { recursive: true });
});
// Create HEAD file - points to the master branch
// This is a "symbolic reference" (symref)
const headContent = 'ref: refs/heads/master \n ' ;
fs . writeFileSync ( path . join ( gitDir , 'HEAD' ), headContent );
// Create config file with minimal settings
const configContent = `[core]
\t repositoryformatversion = 0
\t filemode = false
\t bare = false
` ;
fs . writeFileSync ( path . join ( gitDir , 'config' ), configContent );
// Create description file (used by GitWeb)
const descContent = 'Unnamed repository; edit this file to name the repository. \n ' ;
fs . writeFileSync ( path . join ( gitDir , 'description' ), descContent );
// Create info/exclude (local gitignore)
const excludeContent = '# git ls-files --others --exclude-from=.git/info/exclude \n ' ;
fs . writeFileSync ( path . join ( gitDir , 'info' , 'exclude' ), excludeContent );
console . log ( `Initialized empty Git repository in ${ gitDir } ` );
}
module . exports = { execute };
Testing Your Implementation
Make It Executable
# Make the script executable (on Unix)
chmod +x src/mygit.js
# Link it globally for testing
npm link
Test It!
# Create a test directory
mkdir test-repo
cd test-repo
# Initialize with your implementation
mygit init
# Verify the structure
ls -la .git/
# Should show: HEAD, config, objects/, refs/
Compare with Real Git
# Create another directory
mkdir git-repo
cd git-repo
git init
# Compare the structures
diff -r ../test-repo/.git .git
Deep Dive: Understanding HEAD
The HEAD file is crucial to Git. Let’s understand it:
This is a symbolic reference (symref). It says “I’m pointing to whatever refs/heads/master contains.”
What happens when you commit?
Git creates a new commit object
Reads HEAD to find current branch: refs/heads/master
Updates refs/heads/master to point to new commit
HEAD still points to refs/heads/master (unchanged)
When HEAD contains a commit SHA instead of a ref: abc123def456... (not "ref: refs/heads/...")
This means you’re not on any branch!
A branch is just a file in refs/heads/ containing a commit SHA: $ cat .git/refs/heads/master
abc123def456789...
That’s it! Branches are just pointers to commits.
Deep Dive: The Objects Directory
The objects/ directory is Git’s content-addressable storage:
objects/
├── ab/ # First 2 characters of SHA
│ └── cdef123456... # Remaining 38 characters
├── pack/ # Packed objects (for efficiency)
└── info/ # Additional info
Why split the hash?
A directory with millions of files is slow. By using the first 2 characters as subdirectory names, Git limits each directory to ~256 subdirectories, each with fewer files.
We’ll implement the object storage in the next chapter!
Exercises
Exercise 1: Add --bare flag
Implement mygit init --bare which creates a bare repository (no working directory): // Bare repos don't have:
// - A working directory
// - HEAD pointing to a branch (often points directly to a commit)
// - The .git folder IS the repository (not inside another folder)
// Hint: Check for --bare in args, then:
// 1. Don't create .git subdirectory, use current directory
// 2. Set bare = true in config
Solution outline: const isBare = args . includes ( '--bare' );
const repoDir = isBare ? repoPath : path . join ( repoPath , '.git' );
// ... rest of implementation
Exercise 2: Add --initial-branch flag
Implement mygit init --initial-branch=main to set a custom default branch: // Modern Git uses 'main' instead of 'master'
// Parse the --initial-branch=NAME argument
// Hint: Update HEAD content:
// ref: refs/heads/main
Exercise 3: Validate directory
Add validation to check if the target directory is writable: // Before creating .git, check:
// 1. Parent directory exists (or can be created)
// 2. We have write permissions
// 3. Not trying to init inside another .git
// Use: fs.accessSync(dir, fs.constants.W_OK)
Key Takeaways
Simple Structure Git’s .git directory is surprisingly simple: just files and folders
HEAD is King HEAD always tells you where you are: which branch or which commit
Branches are Files A branch is just a file containing 40 hex characters (a SHA-1 hash)
Content-Addressable The objects/ directory stores everything by its content hash
What’s Next?
In Chapter 2: Object Model , we’ll implement Git’s object storage:
Create and store blob objects (file content)
Implement hash-object and cat-file commands
Understand SHA-1 hashing and zlib compression
Next: Object Model Learn how Git stores files as content-addressed blobs