From 71453f72f285a17ccf0520b9dbdafdc701ff2f4a Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Wed, 27 Aug 2014 17:15:30 -0400 Subject: [PATCH 1/4] a start at concurrent FS system calls --- defs.h | 4 +-- exec.c | 8 +++--- file.c | 8 +++--- log.c | 82 +++++++++++++++++++++++++++++++++++++++---------------- proc.c | 4 +-- sysfile.c | 50 ++++++++++++++++----------------- 6 files changed, 95 insertions(+), 61 deletions(-) diff --git a/defs.h b/defs.h index 23b1019..560b19a 100644 --- a/defs.h +++ b/defs.h @@ -81,8 +81,8 @@ void microdelay(int); // log.c void initlog(void); void log_write(struct buf*); -void begin_trans(); -void commit_trans(); +void begin_op(); +void end_op(); // mp.c extern int ismp; diff --git a/exec.c b/exec.c index 7eaef5b..8dbbdb6 100644 --- a/exec.c +++ b/exec.c @@ -18,9 +18,9 @@ exec(char *path, char **argv) struct proghdr ph; pde_t *pgdir, *oldpgdir; - begin_trans(); + begin_op(); if((ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); @@ -50,7 +50,7 @@ exec(char *path, char **argv) goto bad; } iunlockput(ip); - commit_trans(); + end_op(); ip = 0; // Allocate two pages at the next page boundary. @@ -101,7 +101,7 @@ exec(char *path, char **argv) freevm(pgdir); if(ip){ iunlockput(ip); - commit_trans(); + end_op(); } return -1; } diff --git a/file.c b/file.c index 53c5af2..98cad1e 100644 --- a/file.c +++ b/file.c @@ -72,9 +72,9 @@ fileclose(struct file *f) if(ff.type == FD_PIPE) pipeclose(ff.pipe, ff.writable); else if(ff.type == FD_INODE){ - begin_trans(); + begin_op(); iput(ff.ip); - commit_trans(); + end_op(); } } @@ -136,12 +136,12 @@ filewrite(struct file *f, char *addr, int n) if(n1 > max) n1 = max; - begin_trans(); + begin_op(); ilock(f->ip); if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) f->off += r; iunlock(f->ip); - commit_trans(); + end_op(); if(r < 0) break; diff --git a/log.c b/log.c index 95cc4d5..159df98 100644 --- a/log.c +++ b/log.c @@ -5,18 +5,22 @@ #include "fs.h" #include "buf.h" -// Simple logging. Each file system system call -// should be surrounded with begin_trans() and commit_trans() calls. +// Simple logging that allows concurrent FS system calls. // -// The log holds at most one transaction at a time. Commit forces -// the log (with commit record) to disk, then installs the affected -// blocks to disk, then erases the log. begin_trans() ensures that -// only one system call can be in a transaction; others must wait. -// -// Allowing only one transaction at a time means that the file -// system code doesn't have to worry about the possibility of -// one transaction reading a block that another one has modified, -// for example an i-node block. +// A log transaction contains the updates of *multiple* FS system +// calls. The logging systems only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// blocks this system call, and causes the system to wait +// until end_op() indicates there are no executing FS +// system calls, at which point the last end_op() commits +// all the system calls' writes. // // The log is a physical re-do log containing disk blocks. // The on-disk log format: @@ -38,13 +42,15 @@ struct log { struct spinlock lock; int start; int size; - int busy; // a transaction is active + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. int dev; struct logheader lh; }; struct log log; static void recover_from_log(void); +static void commit(); void initlog(void) @@ -117,19 +123,52 @@ recover_from_log(void) write_head(); // clear the log } +// an FS system call should call begin_op() when it starts. void -begin_trans(void) +begin_op(void) { acquire(&log.lock); - while (log.busy) { - sleep(&log, &log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else { + // XXX wait (for a commit) if log is longish. + // need to reserve to avoid over-commit of log space. + log.outstanding += 1; + release(&log.lock); + break; + } } - log.busy = 1; - release(&log.lock); } +// an FS system call should call end_op() after it finishes. +// can't write the disk &c while holding locks, thus do_commit. void -commit_trans(void) +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } + release(&log.lock); + + if(do_commit){ + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +static void +commit() { if (log.lh.n > 0) { write_head(); // Write header to disk -- the real commit @@ -137,11 +176,6 @@ commit_trans(void) log.lh.n = 0; write_head(); // Erase the transaction from the log } - - acquire(&log.lock); - log.busy = 0; - wakeup(&log); - release(&log.lock); } // Caller has modified b->data and is done with the buffer. @@ -159,7 +193,7 @@ log_write(struct buf *b) if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); - if (!log.busy) + if (log.outstanding < 1) panic("write outside of trans"); for (i = 0; i < log.lh.n; i++) { diff --git a/proc.c b/proc.c index db0e9c7..a642f5a 100644 --- a/proc.c +++ b/proc.c @@ -186,9 +186,9 @@ exit(void) } } - begin_trans(); + begin_op(); iput(proc->cwd); - commit_trans(); + end_op(); proc->cwd = 0; acquire(&ptable.lock); diff --git a/sysfile.c b/sysfile.c index 095fca7..2209f6e 100644 --- a/sysfile.c +++ b/sysfile.c @@ -121,16 +121,16 @@ sys_link(void) if(argstr(0, &old) < 0 || argstr(1, &new) < 0) return -1; - begin_trans(); + begin_op(); if((ip = namei(old)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type == T_DIR){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } @@ -148,7 +148,7 @@ sys_link(void) iunlockput(dp); iput(ip); - commit_trans(); + end_op(); return 0; @@ -157,7 +157,7 @@ bad: ip->nlink--; iupdate(ip); iunlockput(ip); - commit_trans(); + end_op(); return -1; } @@ -189,9 +189,9 @@ sys_unlink(void) if(argstr(0, &path) < 0) return -1; - begin_trans(); + begin_op(); if((dp = nameiparent(path, name)) == 0){ - commit_trans(); + end_op(); return -1; } @@ -225,13 +225,13 @@ sys_unlink(void) iupdate(ip); iunlockput(ip); - commit_trans(); + end_op(); return 0; bad: iunlockput(dp); - commit_trans(); + end_op(); return -1; } @@ -291,23 +291,23 @@ sys_open(void) if(argstr(0, &path) < 0 || argint(1, &omode) < 0) return -1; - begin_trans(); + begin_op(); if(omode & O_CREATE){ ip = create(path, T_FILE, 0, 0); if(ip == 0){ - commit_trans(); + end_op(); return -1; } } else { if((ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type == T_DIR && omode != O_RDONLY){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } } @@ -316,11 +316,11 @@ sys_open(void) if(f) fileclose(f); iunlockput(ip); - commit_trans(); + end_op(); return -1; } iunlock(ip); - commit_trans(); + end_op(); f->type = FD_INODE; f->ip = ip; @@ -336,13 +336,13 @@ sys_mkdir(void) char *path; struct inode *ip; - begin_trans(); + begin_op(); if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ - commit_trans(); + end_op(); return -1; } iunlockput(ip); - commit_trans(); + end_op(); return 0; } @@ -354,16 +354,16 @@ sys_mknod(void) int len; int major, minor; - begin_trans(); + begin_op(); if((len=argstr(0, &path)) < 0 || argint(1, &major) < 0 || argint(2, &minor) < 0 || (ip = create(path, T_DEV, major, minor)) == 0){ - commit_trans(); + end_op(); return -1; } iunlockput(ip); - commit_trans(); + end_op(); return 0; } @@ -373,20 +373,20 @@ sys_chdir(void) char *path; struct inode *ip; - begin_trans(); + begin_op(); if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type != T_DIR){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } iunlock(ip); iput(proc->cwd); - commit_trans(); + end_op(); proc->cwd = ip; return 0; } From 48aa917403de1599a02924e429a9f43ea31e9cc1 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 28 Aug 2014 05:57:47 -0400 Subject: [PATCH 2/4] i think this is a working concurrent logging scheme --- bio.c | 2 + log.c | 21 +++++- mkfs.c | 2 +- param.h | 5 +- usertests.c | 186 ++++++++++++++++++++++++++-------------------------- 5 files changed, 117 insertions(+), 99 deletions(-) diff --git a/bio.c b/bio.c index de1d0f2..d2ebc4b 100644 --- a/bio.c +++ b/bio.c @@ -80,6 +80,8 @@ bget(uint dev, uint sector) } // Not cached; recycle some non-busy and clean buffer. + // "clean" because B_DIRTY and !B_BUSY means log.c + // hasn't yet committed the changes to the buffer. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ if((b->flags & B_BUSY) == 0 && (b->flags & B_DIRTY) == 0){ b->dev = dev; diff --git a/log.c b/log.c index 159df98..9d42ea8 100644 --- a/log.c +++ b/log.c @@ -52,6 +52,10 @@ struct log log; static void recover_from_log(void); static void commit(); +// statistics, delete eventually XXX. +static int maxsize; +static int maxoutstanding; + void initlog(void) { @@ -131,10 +135,15 @@ begin_op(void) while(1){ if(log.committing){ sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); } else { - // XXX wait (for a commit) if log is longish. - // need to reserve to avoid over-commit of log space. log.outstanding += 1; + if(log.outstanding > maxoutstanding){ + maxoutstanding = log.outstanding; + cprintf("%d outstanding\n", log.outstanding); + } release(&log.lock); break; } @@ -155,6 +164,9 @@ end_op(void) if(log.outstanding == 0){ do_commit = 1; log.committing = 1; + } else { + // begin_op() may be waiting for log space. + wakeup(&log); } release(&log.lock); @@ -208,6 +220,11 @@ log_write(struct buf *b) if (i == log.lh.n) log.lh.n++; b->flags |= B_DIRTY; // XXX prevent eviction + + if(log.lh.n > maxsize){ + maxsize = log.lh.n; + cprintf("log size %d/%d\n", log.lh.n, LOGSIZE); + } } //PAGEBREAK! diff --git a/mkfs.c b/mkfs.c index 4b0e329..c168377 100644 --- a/mkfs.c +++ b/mkfs.c @@ -13,7 +13,7 @@ #define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0) -int nblocks = 985; +int nblocks = (995-LOGSIZE); int nlog = LOGSIZE; int ninodes = 200; int size = 1024; diff --git a/param.h b/param.h index b6f6f46..bdac60c 100644 --- a/param.h +++ b/param.h @@ -3,10 +3,11 @@ #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process #define NFILE 100 // open files per system -#define NBUF 10 // size of disk block cache #define NINODE 50 // maximum number of active i-nodes #define NDEV 10 // maximum major device number #define ROOTDEV 1 // device number of file system root disk #define MAXARG 32 // max exec arguments -#define LOGSIZE 10 // max data sectors in on-disk log +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data sectors in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache (>= LOGSIZE) diff --git a/usertests.c b/usertests.c index 5a78c7c..22a7bfb 100644 --- a/usertests.c +++ b/usertests.c @@ -512,51 +512,56 @@ sharedfd(void) } } -// two processes write two different files at the same +// four processes write different files at the same // time, to test block allocation. void -twofiles(void) +fourfiles(void) { - int fd, pid, i, j, n, total; + int fd, pid, i, j, n, total, pi; + char *names[] = { "f0", "f1", "f2", "f3" }; char *fname; - printf(1, "twofiles test\n"); + printf(1, "fourfiles test\n"); - unlink("f1"); - unlink("f2"); + for(pi = 0; pi < 4; pi++){ + fname = names[pi]; + unlink(fname); - pid = fork(); - if(pid < 0){ - printf(1, "fork failed\n"); - exit(); - } + pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); + exit(); + } - fname = pid ? "f1" : "f2"; - fd = open(fname, O_CREATE | O_RDWR); - if(fd < 0){ - printf(1, "create failed\n"); - exit(); - } - - memset(buf, pid?'p':'c', 512); - for(i = 0; i < 12; i++){ - if((n = write(fd, buf, 500)) != 500){ - printf(1, "write failed %d\n", n); + if(pid == 0){ + fd = open(fname, O_CREATE | O_RDWR); + if(fd < 0){ + printf(1, "create failed\n"); + exit(); + } + + memset(buf, '0'+pi, 512); + for(i = 0; i < 12; i++){ + if((n = write(fd, buf, 500)) != 500){ + printf(1, "write failed %d\n", n); + exit(); + } + } exit(); } } - close(fd); - if(pid) + + for(pi = 0; pi < 4; pi++){ wait(); - else - exit(); + } for(i = 0; i < 2; i++){ - fd = open(i?"f1":"f2", 0); + fname = names[i]; + fd = open(fname, 0); total = 0; while((n = read(fd, buf, sizeof(buf))) > 0){ for(j = 0; j < n; j++){ - if(buf[j] != (i?'p':'c')){ + if(buf[j] != '0'+i){ printf(1, "wrong char\n"); exit(); } @@ -568,87 +573,80 @@ twofiles(void) printf(1, "wrong length %d\n", total); exit(); } + unlink(fname); } - unlink("f1"); - unlink("f2"); - - printf(1, "twofiles ok\n"); + printf(1, "fourfiles ok\n"); } -// two processes create and delete different files in same directory +// four processes create and delete different files in same directory void createdelete(void) { enum { N = 20 }; - int pid, i, fd; + int pid, i, fd, pi; char name[32]; printf(1, "createdelete test\n"); - pid = fork(); - if(pid < 0){ - printf(1, "fork failed\n"); - exit(); - } - name[0] = pid ? 'p' : 'c'; - name[2] = '\0'; - for(i = 0; i < N; i++){ - name[1] = '0' + i; - fd = open(name, O_CREATE | O_RDWR); - if(fd < 0){ - printf(1, "create failed\n"); + for(pi = 0; pi < 4; pi++){ + pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); exit(); } - close(fd); - if(i > 0 && (i % 2 ) == 0){ - name[1] = '0' + (i / 2); - if(unlink(name) < 0){ - printf(1, "unlink failed\n"); + + if(pid == 0){ + name[0] = 'p' + pi; + name[2] = '\0'; + for(i = 0; i < N; i++){ + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + if(fd < 0){ + printf(1, "create failed\n"); + exit(); + } + close(fd); + if(i > 0 && (i % 2 ) == 0){ + name[1] = '0' + (i / 2); + if(unlink(name) < 0){ + printf(1, "unlink failed\n"); + exit(); + } + } + } + exit(); + } + } + + for(pi = 0; pi < 4; pi++){ + wait(); + } + + name[0] = name[1] = name[2] = 0; + for(i = 0; i < N; i++){ + for(pi = 0; pi < 4; pi++){ + name[0] = 'p' + pi; + name[1] = '0' + i; + fd = open(name, 0); + if((i == 0 || i >= N/2) && fd < 0){ + printf(1, "oops createdelete %s didn't exist\n", name); + exit(); + } else if((i >= 1 && i < N/2) && fd >= 0){ + printf(1, "oops createdelete %s did exist\n", name); exit(); } + if(fd >= 0) + close(fd); } } - if(pid==0) - exit(); - else - wait(); - for(i = 0; i < N; i++){ - name[0] = 'p'; - name[1] = '0' + i; - fd = open(name, 0); - if((i == 0 || i >= N/2) && fd < 0){ - printf(1, "oops createdelete %s didn't exist\n", name); - exit(); - } else if((i >= 1 && i < N/2) && fd >= 0){ - printf(1, "oops createdelete %s did exist\n", name); - exit(); + for(pi = 0; pi < 4; pi++){ + name[0] = 'p' + i; + name[1] = '0' + i; + unlink(name); } - if(fd >= 0) - close(fd); - - name[0] = 'c'; - name[1] = '0' + i; - fd = open(name, 0); - if((i == 0 || i >= N/2) && fd < 0){ - printf(1, "oops createdelete %s didn't exist\n", name); - exit(); - } else if((i >= 1 && i < N/2) && fd >= 0){ - printf(1, "oops createdelete %s did exist\n", name); - exit(); - } - if(fd >= 0) - close(fd); - } - - for(i = 0; i < N; i++){ - name[0] = 'p'; - name[1] = '0' + i; - unlink(name); - name[0] = 'c'; - unlink(name); } printf(1, "createdelete ok\n"); @@ -1716,6 +1714,12 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + createdelete(); + linkunlink(); + concreate(); + fourfiles(); + sharedfd(); + bigargtest(); bigwrite(); bigargtest(); @@ -1741,18 +1745,12 @@ main(int argc, char *argv[]) fourteen(); bigfile(); subdir(); - concreate(); - linkunlink(); linktest(); unlinkread(); - createdelete(); - twofiles(); - sharedfd(); dirfile(); iref(); forktest(); bigdir(); // slow - exectest(); exit(); From 11183588dc69085b834e013564fc48526e23ef70 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 28 Aug 2014 06:27:01 -0400 Subject: [PATCH 3/4] nits --- log.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/log.c b/log.c index 9d42ea8..6900b8d 100644 --- a/log.c +++ b/log.c @@ -7,8 +7,8 @@ // Simple logging that allows concurrent FS system calls. // -// A log transaction contains the updates of *multiple* FS system -// calls. The logging systems only commits when there are +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are // no FS system calls active. Thus there is never // any reasoning required about whether a commit might // write an uncommitted system call's updates to disk. @@ -17,10 +17,7 @@ // its start and end. Usually begin_op() just increments // the count of in-progress FS system calls and returns. // But if it thinks the log is close to running out, it -// blocks this system call, and causes the system to wait -// until end_op() indicates there are no executing FS -// system calls, at which point the last end_op() commits -// all the system calls' writes. +// sleeps until the last outstanding end_op() commits. // // The log is a physical re-do log containing disk blocks. // The on-disk log format: @@ -52,10 +49,6 @@ struct log log; static void recover_from_log(void); static void commit(); -// statistics, delete eventually XXX. -static int maxsize; -static int maxoutstanding; - void initlog(void) { @@ -127,7 +120,7 @@ recover_from_log(void) write_head(); // clear the log } -// an FS system call should call begin_op() when it starts. +// called at the start of each FS system call. void begin_op(void) { @@ -140,18 +133,14 @@ begin_op(void) sleep(&log, &log.lock); } else { log.outstanding += 1; - if(log.outstanding > maxoutstanding){ - maxoutstanding = log.outstanding; - cprintf("%d outstanding\n", log.outstanding); - } release(&log.lock); break; } } } -// an FS system call should call end_op() after it finishes. -// can't write the disk &c while holding locks, thus do_commit. +// called at the end of each FS system call. +// commits if this was the last outstanding operation. void end_op(void) { @@ -171,6 +160,8 @@ end_op(void) release(&log.lock); if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. commit(); acquire(&log.lock); log.committing = 0; @@ -209,7 +200,7 @@ log_write(struct buf *b) panic("write outside of trans"); for (i = 0; i < log.lh.n; i++) { - if (log.lh.sector[i] == b->sector) // log absorbtion? + if (log.lh.sector[i] == b->sector) // log absorbtion break; } log.lh.sector[i] = b->sector; @@ -219,12 +210,7 @@ log_write(struct buf *b) brelse(lbuf); if (i == log.lh.n) log.lh.n++; - b->flags |= B_DIRTY; // XXX prevent eviction - - if(log.lh.n > maxsize){ - maxsize = log.lh.n; - cprintf("log size %d/%d\n", log.lh.n, LOGSIZE); - } + b->flags |= B_DIRTY; // prevent eviction } //PAGEBREAK! From 2b2c1971fc2c3cc6fa1f2f3e7b507a3a35a0cbb8 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 28 Aug 2014 16:06:12 -0400 Subject: [PATCH 4/4] write log blocks from cache only at end of transaction --- log.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/log.c b/log.c index 6900b8d..0abe1fe 100644 --- a/log.c +++ b/log.c @@ -170,10 +170,27 @@ end_op(void) } } +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.sector[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + static void commit() { if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log write_head(); // Write header to disk -- the real commit install_trans(); // Now install writes to home locations log.lh.n = 0; @@ -182,8 +199,9 @@ commit() } // Caller has modified b->data and is done with the buffer. -// Append the block to the log and record the block number, -// but don't write the log header (which would commit the write). +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// // log_write() replaces bwrite(); a typical use is: // bp = bread(...) // modify bp->data[] @@ -197,17 +215,13 @@ log_write(struct buf *b) if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); if (log.outstanding < 1) - panic("write outside of trans"); + panic("log_write outside of trans"); for (i = 0; i < log.lh.n; i++) { if (log.lh.sector[i] == b->sector) // log absorbtion break; } log.lh.sector[i] = b->sector; - struct buf *lbuf = bread(b->dev, log.start+i+1); - memmove(lbuf->data, b->data, BSIZE); - bwrite(lbuf); - brelse(lbuf); if (i == log.lh.n) log.lh.n++; b->flags |= B_DIRTY; // prevent eviction