c24ac5d763
that they can be different. Clean up mkfs, simplifying specifying fs parameters, remove some redundancy between fs and mkfs, and fix disk layout bugs. Call blocks in the file system blocks instead of sectors. Passes usertests for different block sizes.
653 lines
15 KiB
C
653 lines
15 KiB
C
// File system implementation. Five layers:
|
|
// + Blocks: allocator for raw disk blocks.
|
|
// + Log: crash recovery for multi-step updates.
|
|
// + Files: inode allocator, reading, writing, metadata.
|
|
// + Directories: inode with special contents (list of other inodes!)
|
|
// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
|
|
//
|
|
// This file contains the low-level file system manipulation
|
|
// routines. The (higher-level) system call implementations
|
|
// are in sysfile.c.
|
|
|
|
#include "types.h"
|
|
#include "defs.h"
|
|
#include "param.h"
|
|
#include "stat.h"
|
|
#include "mmu.h"
|
|
#include "proc.h"
|
|
#include "spinlock.h"
|
|
#include "fs.h"
|
|
#include "buf.h"
|
|
#include "file.h"
|
|
|
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
|
static void itrunc(struct inode*);
|
|
|
|
// Read the super block.
|
|
void
|
|
readsb(int dev, struct superblock *sb)
|
|
{
|
|
struct buf *bp;
|
|
|
|
bp = bread(dev, 1);
|
|
memmove(sb, bp->data, sizeof(*sb));
|
|
brelse(bp);
|
|
}
|
|
|
|
// Zero a block.
|
|
static void
|
|
bzero(int dev, int bno)
|
|
{
|
|
struct buf *bp;
|
|
|
|
bp = bread(dev, bno);
|
|
memset(bp->data, 0, BSIZE);
|
|
log_write(bp);
|
|
brelse(bp);
|
|
}
|
|
|
|
// Blocks.
|
|
|
|
// Allocate a zeroed disk block.
|
|
static uint
|
|
balloc(uint dev)
|
|
{
|
|
int b, bi, m;
|
|
struct buf *bp;
|
|
struct superblock sb;
|
|
|
|
bp = 0;
|
|
readsb(dev, &sb);
|
|
for(b = 0; b < sb.size; b += BPB){
|
|
bp = bread(dev, BBLOCK(b, sb.ninodes));
|
|
for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
|
|
m = 1 << (bi % 8);
|
|
if((bp->data[bi/8] & m) == 0){ // Is block free?
|
|
bp->data[bi/8] |= m; // Mark block in use.
|
|
log_write(bp);
|
|
brelse(bp);
|
|
bzero(dev, b + bi);
|
|
return b + bi;
|
|
}
|
|
}
|
|
brelse(bp);
|
|
}
|
|
panic("balloc: out of blocks");
|
|
}
|
|
|
|
// Free a disk block.
|
|
static void
|
|
bfree(int dev, uint b)
|
|
{
|
|
struct buf *bp;
|
|
struct superblock sb;
|
|
int bi, m;
|
|
|
|
readsb(dev, &sb);
|
|
bp = bread(dev, BBLOCK(b, sb.ninodes));
|
|
bi = b % BPB;
|
|
m = 1 << (bi % 8);
|
|
if((bp->data[bi/8] & m) == 0)
|
|
panic("freeing free block");
|
|
bp->data[bi/8] &= ~m;
|
|
log_write(bp);
|
|
brelse(bp);
|
|
}
|
|
|
|
// Inodes.
|
|
//
|
|
// An inode describes a single unnamed file.
|
|
// The inode disk structure holds metadata: the file's type,
|
|
// its size, the number of links referring to it, and the
|
|
// list of blocks holding the file's content.
|
|
//
|
|
// The inodes are laid out sequentially on disk immediately after
|
|
// the superblock. Each inode has a number, indicating its
|
|
// position on the disk.
|
|
//
|
|
// The kernel keeps a cache of in-use inodes in memory
|
|
// to provide a place for synchronizing access
|
|
// to inodes used by multiple processes. The cached
|
|
// inodes include book-keeping information that is
|
|
// not stored on disk: ip->ref and ip->flags.
|
|
//
|
|
// An inode and its in-memory represtative go through a
|
|
// sequence of states before they can be used by the
|
|
// rest of the file system code.
|
|
//
|
|
// * Allocation: an inode is allocated if its type (on disk)
|
|
// is non-zero. ialloc() allocates, iput() frees if
|
|
// the link count has fallen to zero.
|
|
//
|
|
// * Referencing in cache: an entry in the inode cache
|
|
// is free if ip->ref is zero. Otherwise ip->ref tracks
|
|
// the number of in-memory pointers to the entry (open
|
|
// files and current directories). iget() to find or
|
|
// create a cache entry and increment its ref, iput()
|
|
// to decrement ref.
|
|
//
|
|
// * Valid: the information (type, size, &c) in an inode
|
|
// cache entry is only correct when the I_VALID bit
|
|
// is set in ip->flags. ilock() reads the inode from
|
|
// the disk and sets I_VALID, while iput() clears
|
|
// I_VALID if ip->ref has fallen to zero.
|
|
//
|
|
// * Locked: file system code may only examine and modify
|
|
// the information in an inode and its content if it
|
|
// has first locked the inode. The I_BUSY flag indicates
|
|
// that the inode is locked. ilock() sets I_BUSY,
|
|
// while iunlock clears it.
|
|
//
|
|
// Thus a typical sequence is:
|
|
// ip = iget(dev, inum)
|
|
// ilock(ip)
|
|
// ... examine and modify ip->xxx ...
|
|
// iunlock(ip)
|
|
// iput(ip)
|
|
//
|
|
// ilock() is separate from iget() so that system calls can
|
|
// get a long-term reference to an inode (as for an open file)
|
|
// and only lock it for short periods (e.g., in read()).
|
|
// The separation also helps avoid deadlock and races during
|
|
// pathname lookup. iget() increments ip->ref so that the inode
|
|
// stays cached and pointers to it remain valid.
|
|
//
|
|
// Many internal file system functions expect the caller to
|
|
// have locked the inodes involved; this lets callers create
|
|
// multi-step atomic operations.
|
|
|
|
struct {
|
|
struct spinlock lock;
|
|
struct inode inode[NINODE];
|
|
} icache;
|
|
|
|
void
|
|
iinit(void)
|
|
{
|
|
initlock(&icache.lock, "icache");
|
|
}
|
|
|
|
static struct inode* iget(uint dev, uint inum);
|
|
|
|
//PAGEBREAK!
|
|
// Allocate a new inode with the given type on device dev.
|
|
// A free inode has a type of zero.
|
|
struct inode*
|
|
ialloc(uint dev, short type)
|
|
{
|
|
int inum;
|
|
struct buf *bp;
|
|
struct dinode *dip;
|
|
struct superblock sb;
|
|
|
|
readsb(dev, &sb);
|
|
|
|
for(inum = 1; inum < sb.ninodes; inum++){
|
|
bp = bread(dev, IBLOCK(inum));
|
|
dip = (struct dinode*)bp->data + inum%IPB;
|
|
if(dip->type == 0){ // a free inode
|
|
memset(dip, 0, sizeof(*dip));
|
|
dip->type = type;
|
|
log_write(bp); // mark it allocated on the disk
|
|
brelse(bp);
|
|
return iget(dev, inum);
|
|
}
|
|
brelse(bp);
|
|
}
|
|
panic("ialloc: no inodes");
|
|
}
|
|
|
|
// Copy a modified in-memory inode to disk.
|
|
void
|
|
iupdate(struct inode *ip)
|
|
{
|
|
struct buf *bp;
|
|
struct dinode *dip;
|
|
|
|
bp = bread(ip->dev, IBLOCK(ip->inum));
|
|
dip = (struct dinode*)bp->data + ip->inum%IPB;
|
|
dip->type = ip->type;
|
|
dip->major = ip->major;
|
|
dip->minor = ip->minor;
|
|
dip->nlink = ip->nlink;
|
|
dip->size = ip->size;
|
|
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
|
|
log_write(bp);
|
|
brelse(bp);
|
|
}
|
|
|
|
// Find the inode with number inum on device dev
|
|
// and return the in-memory copy. Does not lock
|
|
// the inode and does not read it from disk.
|
|
static struct inode*
|
|
iget(uint dev, uint inum)
|
|
{
|
|
struct inode *ip, *empty;
|
|
|
|
acquire(&icache.lock);
|
|
|
|
// Is the inode already cached?
|
|
empty = 0;
|
|
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
|
|
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
|
|
ip->ref++;
|
|
release(&icache.lock);
|
|
return ip;
|
|
}
|
|
if(empty == 0 && ip->ref == 0) // Remember empty slot.
|
|
empty = ip;
|
|
}
|
|
|
|
// Recycle an inode cache entry.
|
|
if(empty == 0)
|
|
panic("iget: no inodes");
|
|
|
|
ip = empty;
|
|
ip->dev = dev;
|
|
ip->inum = inum;
|
|
ip->ref = 1;
|
|
ip->flags = 0;
|
|
release(&icache.lock);
|
|
|
|
return ip;
|
|
}
|
|
|
|
// Increment reference count for ip.
|
|
// Returns ip to enable ip = idup(ip1) idiom.
|
|
struct inode*
|
|
idup(struct inode *ip)
|
|
{
|
|
acquire(&icache.lock);
|
|
ip->ref++;
|
|
release(&icache.lock);
|
|
return ip;
|
|
}
|
|
|
|
// Lock the given inode.
|
|
// Reads the inode from disk if necessary.
|
|
void
|
|
ilock(struct inode *ip)
|
|
{
|
|
struct buf *bp;
|
|
struct dinode *dip;
|
|
|
|
if(ip == 0 || ip->ref < 1)
|
|
panic("ilock");
|
|
|
|
acquire(&icache.lock);
|
|
while(ip->flags & I_BUSY)
|
|
sleep(ip, &icache.lock);
|
|
ip->flags |= I_BUSY;
|
|
release(&icache.lock);
|
|
|
|
if(!(ip->flags & I_VALID)){
|
|
bp = bread(ip->dev, IBLOCK(ip->inum));
|
|
dip = (struct dinode*)bp->data + ip->inum%IPB;
|
|
ip->type = dip->type;
|
|
ip->major = dip->major;
|
|
ip->minor = dip->minor;
|
|
ip->nlink = dip->nlink;
|
|
ip->size = dip->size;
|
|
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
|
|
brelse(bp);
|
|
ip->flags |= I_VALID;
|
|
if(ip->type == 0)
|
|
panic("ilock: no type");
|
|
}
|
|
}
|
|
|
|
// Unlock the given inode.
|
|
void
|
|
iunlock(struct inode *ip)
|
|
{
|
|
if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
|
|
panic("iunlock");
|
|
|
|
acquire(&icache.lock);
|
|
ip->flags &= ~I_BUSY;
|
|
wakeup(ip);
|
|
release(&icache.lock);
|
|
}
|
|
|
|
// Drop a reference to an in-memory inode.
|
|
// If that was the last reference, the inode cache entry can
|
|
// be recycled.
|
|
// If that was the last reference and the inode has no links
|
|
// to it, free the inode (and its content) on disk.
|
|
// All calls to iput() must be inside a transaction in
|
|
// case it has to free the inode.
|
|
void
|
|
iput(struct inode *ip)
|
|
{
|
|
acquire(&icache.lock);
|
|
if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
|
|
// inode has no links and no other references: truncate and free.
|
|
if(ip->flags & I_BUSY)
|
|
panic("iput busy");
|
|
ip->flags |= I_BUSY;
|
|
release(&icache.lock);
|
|
itrunc(ip);
|
|
ip->type = 0;
|
|
iupdate(ip);
|
|
acquire(&icache.lock);
|
|
ip->flags = 0;
|
|
wakeup(ip);
|
|
}
|
|
ip->ref--;
|
|
release(&icache.lock);
|
|
}
|
|
|
|
// Common idiom: unlock, then put.
|
|
void
|
|
iunlockput(struct inode *ip)
|
|
{
|
|
iunlock(ip);
|
|
iput(ip);
|
|
}
|
|
|
|
//PAGEBREAK!
|
|
// Inode content
|
|
//
|
|
// The content (data) associated with each inode is stored
|
|
// in blocks on the disk. The first NDIRECT block numbers
|
|
// are listed in ip->addrs[]. The next NINDIRECT blocks are
|
|
// listed in block ip->addrs[NDIRECT].
|
|
|
|
// Return the disk block address of the nth block in inode ip.
|
|
// If there is no such block, bmap allocates one.
|
|
static uint
|
|
bmap(struct inode *ip, uint bn)
|
|
{
|
|
uint addr, *a;
|
|
struct buf *bp;
|
|
|
|
if(bn < NDIRECT){
|
|
if((addr = ip->addrs[bn]) == 0)
|
|
ip->addrs[bn] = addr = balloc(ip->dev);
|
|
return addr;
|
|
}
|
|
bn -= NDIRECT;
|
|
|
|
if(bn < NINDIRECT){
|
|
// Load indirect block, allocating if necessary.
|
|
if((addr = ip->addrs[NDIRECT]) == 0)
|
|
ip->addrs[NDIRECT] = addr = balloc(ip->dev);
|
|
bp = bread(ip->dev, addr);
|
|
a = (uint*)bp->data;
|
|
if((addr = a[bn]) == 0){
|
|
a[bn] = addr = balloc(ip->dev);
|
|
log_write(bp);
|
|
}
|
|
brelse(bp);
|
|
return addr;
|
|
}
|
|
|
|
panic("bmap: out of range");
|
|
}
|
|
|
|
// Truncate inode (discard contents).
|
|
// Only called when the inode has no links
|
|
// to it (no directory entries referring to it)
|
|
// and has no in-memory reference to it (is
|
|
// not an open file or current directory).
|
|
static void
|
|
itrunc(struct inode *ip)
|
|
{
|
|
int i, j;
|
|
struct buf *bp;
|
|
uint *a;
|
|
|
|
for(i = 0; i < NDIRECT; i++){
|
|
if(ip->addrs[i]){
|
|
bfree(ip->dev, ip->addrs[i]);
|
|
ip->addrs[i] = 0;
|
|
}
|
|
}
|
|
|
|
if(ip->addrs[NDIRECT]){
|
|
bp = bread(ip->dev, ip->addrs[NDIRECT]);
|
|
a = (uint*)bp->data;
|
|
for(j = 0; j < NINDIRECT; j++){
|
|
if(a[j])
|
|
bfree(ip->dev, a[j]);
|
|
}
|
|
brelse(bp);
|
|
bfree(ip->dev, ip->addrs[NDIRECT]);
|
|
ip->addrs[NDIRECT] = 0;
|
|
}
|
|
|
|
ip->size = 0;
|
|
iupdate(ip);
|
|
}
|
|
|
|
// Copy stat information from inode.
|
|
void
|
|
stati(struct inode *ip, struct stat *st)
|
|
{
|
|
st->dev = ip->dev;
|
|
st->ino = ip->inum;
|
|
st->type = ip->type;
|
|
st->nlink = ip->nlink;
|
|
st->size = ip->size;
|
|
}
|
|
|
|
//PAGEBREAK!
|
|
// Read data from inode.
|
|
int
|
|
readi(struct inode *ip, char *dst, uint off, uint n)
|
|
{
|
|
uint tot, m;
|
|
struct buf *bp;
|
|
|
|
if(ip->type == T_DEV){
|
|
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
|
|
return -1;
|
|
return devsw[ip->major].read(ip, dst, n);
|
|
}
|
|
|
|
if(off > ip->size || off + n < off)
|
|
return -1;
|
|
if(off + n > ip->size)
|
|
n = ip->size - off;
|
|
|
|
for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
|
|
bp = bread(ip->dev, bmap(ip, off/BSIZE));
|
|
m = min(n - tot, BSIZE - off%BSIZE);
|
|
memmove(dst, bp->data + off%BSIZE, m);
|
|
brelse(bp);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
// PAGEBREAK!
|
|
// Write data to inode.
|
|
int
|
|
writei(struct inode *ip, char *src, uint off, uint n)
|
|
{
|
|
uint tot, m;
|
|
struct buf *bp;
|
|
|
|
if(ip->type == T_DEV){
|
|
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
|
|
return -1;
|
|
return devsw[ip->major].write(ip, src, n);
|
|
}
|
|
|
|
if(off > ip->size || off + n < off)
|
|
return -1;
|
|
if(off + n > MAXFILE*BSIZE)
|
|
return -1;
|
|
|
|
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
|
|
bp = bread(ip->dev, bmap(ip, off/BSIZE));
|
|
m = min(n - tot, BSIZE - off%BSIZE);
|
|
memmove(bp->data + off%BSIZE, src, m);
|
|
log_write(bp);
|
|
brelse(bp);
|
|
}
|
|
|
|
if(n > 0 && off > ip->size){
|
|
ip->size = off;
|
|
iupdate(ip);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
//PAGEBREAK!
|
|
// Directories
|
|
|
|
int
|
|
namecmp(const char *s, const char *t)
|
|
{
|
|
return strncmp(s, t, DIRSIZ);
|
|
}
|
|
|
|
// Look for a directory entry in a directory.
|
|
// If found, set *poff to byte offset of entry.
|
|
struct inode*
|
|
dirlookup(struct inode *dp, char *name, uint *poff)
|
|
{
|
|
uint off, inum;
|
|
struct dirent de;
|
|
|
|
if(dp->type != T_DIR)
|
|
panic("dirlookup not DIR");
|
|
|
|
for(off = 0; off < dp->size; off += sizeof(de)){
|
|
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
|
|
panic("dirlink read");
|
|
if(de.inum == 0)
|
|
continue;
|
|
if(namecmp(name, de.name) == 0){
|
|
// entry matches path element
|
|
if(poff)
|
|
*poff = off;
|
|
inum = de.inum;
|
|
return iget(dp->dev, inum);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Write a new directory entry (name, inum) into the directory dp.
|
|
int
|
|
dirlink(struct inode *dp, char *name, uint inum)
|
|
{
|
|
int off;
|
|
struct dirent de;
|
|
struct inode *ip;
|
|
|
|
// Check that name is not present.
|
|
if((ip = dirlookup(dp, name, 0)) != 0){
|
|
iput(ip);
|
|
return -1;
|
|
}
|
|
|
|
// Look for an empty dirent.
|
|
for(off = 0; off < dp->size; off += sizeof(de)){
|
|
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
|
|
panic("dirlink read");
|
|
if(de.inum == 0)
|
|
break;
|
|
}
|
|
|
|
strncpy(de.name, name, DIRSIZ);
|
|
de.inum = inum;
|
|
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
|
|
panic("dirlink");
|
|
|
|
return 0;
|
|
}
|
|
|
|
//PAGEBREAK!
|
|
// Paths
|
|
|
|
// Copy the next path element from path into name.
|
|
// Return a pointer to the element following the copied one.
|
|
// The returned path has no leading slashes,
|
|
// so the caller can check *path=='\0' to see if the name is the last one.
|
|
// If no name to remove, return 0.
|
|
//
|
|
// Examples:
|
|
// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
|
|
// skipelem("///a//bb", name) = "bb", setting name = "a"
|
|
// skipelem("a", name) = "", setting name = "a"
|
|
// skipelem("", name) = skipelem("////", name) = 0
|
|
//
|
|
static char*
|
|
skipelem(char *path, char *name)
|
|
{
|
|
char *s;
|
|
int len;
|
|
|
|
while(*path == '/')
|
|
path++;
|
|
if(*path == 0)
|
|
return 0;
|
|
s = path;
|
|
while(*path != '/' && *path != 0)
|
|
path++;
|
|
len = path - s;
|
|
if(len >= DIRSIZ)
|
|
memmove(name, s, DIRSIZ);
|
|
else {
|
|
memmove(name, s, len);
|
|
name[len] = 0;
|
|
}
|
|
while(*path == '/')
|
|
path++;
|
|
return path;
|
|
}
|
|
|
|
// Look up and return the inode for a path name.
|
|
// If parent != 0, return the inode for the parent and copy the final
|
|
// path element into name, which must have room for DIRSIZ bytes.
|
|
// Must be called inside a transaction since it calls iput().
|
|
static struct inode*
|
|
namex(char *path, int nameiparent, char *name)
|
|
{
|
|
struct inode *ip, *next;
|
|
|
|
if(*path == '/')
|
|
ip = iget(ROOTDEV, ROOTINO);
|
|
else
|
|
ip = idup(proc->cwd);
|
|
|
|
while((path = skipelem(path, name)) != 0){
|
|
ilock(ip);
|
|
if(ip->type != T_DIR){
|
|
iunlockput(ip);
|
|
return 0;
|
|
}
|
|
if(nameiparent && *path == '\0'){
|
|
// Stop one level early.
|
|
iunlock(ip);
|
|
return ip;
|
|
}
|
|
if((next = dirlookup(ip, name, 0)) == 0){
|
|
iunlockput(ip);
|
|
return 0;
|
|
}
|
|
iunlockput(ip);
|
|
ip = next;
|
|
}
|
|
if(nameiparent){
|
|
iput(ip);
|
|
return 0;
|
|
}
|
|
return ip;
|
|
}
|
|
|
|
struct inode*
|
|
namei(char *path)
|
|
{
|
|
char name[DIRSIZ];
|
|
return namex(path, 0, name);
|
|
}
|
|
|
|
struct inode*
|
|
nameiparent(char *path, char *name)
|
|
{
|
|
return namex(path, 1, name);
|
|
}
|