From 530431045237d7ccbbc0bb65ed83309845c19893 Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Mon, 29 Jul 2019 17:33:16 -0400 Subject: [PATCH 1/6] Remove B_DIRTY Use refcnt to pin blocks into the cache Replace flags/B_VALID with a boolean field valid Use info[id].status to signal completion of disk interrupt Pass a read/write flag to virtio_disk_rw --- kernel/bio.c | 20 +++++++------------- kernel/buf.h | 4 +--- kernel/defs.h | 2 +- kernel/log.c | 8 +++++--- kernel/virtio_disk.c | 12 +++++------- 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/kernel/bio.c b/kernel/bio.c index a08884b..7455c06 100644 --- a/kernel/bio.c +++ b/kernel/bio.c @@ -12,11 +12,7 @@ // * Do not use the buffer after calling brelse. // * Only one process at a time can use a buffer, // so do not keep them longer than necessary. -// -// The implementation uses two state flags internally: -// * B_VALID: the buffer data has been read from the disk. -// * B_DIRTY: the buffer data has been modified -// and needs to be written to disk. + #include "types.h" #include "param.h" @@ -76,13 +72,11 @@ bget(uint dev, uint blockno) } // Not cached; recycle an unused buffer. - // Even if refcnt==0, B_DIRTY indicates a buffer is in use - // because log.c has modified it but not yet committed it. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ - if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + if(b->refcnt == 0) { b->dev = dev; b->blockno = blockno; - b->flags = 0; + b->valid = 0; b->refcnt = 1; release(&bcache.lock); acquiresleep(&b->lock); @@ -99,8 +93,9 @@ bread(uint dev, uint blockno) struct buf *b; b = bget(dev, blockno); - if((b->flags & B_VALID) == 0) { - virtio_disk_rw(b); + if(!b->valid) { + virtio_disk_rw(b, 0); + b->valid = 1; } return b; } @@ -111,8 +106,7 @@ bwrite(struct buf *b) { if(!holdingsleep(&b->lock)) panic("bwrite"); - b->flags |= B_DIRTY; - virtio_disk_rw(b); + virtio_disk_rw(b, 1); } // Release a locked buffer. diff --git a/kernel/buf.h b/kernel/buf.h index 3266495..ae9d264 100644 --- a/kernel/buf.h +++ b/kernel/buf.h @@ -1,5 +1,5 @@ struct buf { - int flags; + int valid; // has data been read from disk? uint dev; uint blockno; struct sleeplock lock; @@ -9,6 +9,4 @@ struct buf { struct buf *qnext; // disk queue uchar data[BSIZE]; }; -#define B_VALID 0x2 // buffer has been read from disk -#define B_DIRTY 0x4 // buffer needs to be written to disk diff --git a/kernel/defs.h b/kernel/defs.h index de4acfd..8421082 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -177,7 +177,7 @@ void plic_complete(int); // virtio_disk.c void virtio_disk_init(void); -void virtio_disk_rw(struct buf *); +void virtio_disk_rw(struct buf *, int); void virtio_disk_intr(); // number of elements in fixed-size array diff --git a/kernel/log.c b/kernel/log.c index c8f7e62..5aea267 100644 --- a/kernel/log.c +++ b/kernel/log.c @@ -77,6 +77,7 @@ install_trans(void) struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst bwrite(dbuf); // write dst to disk + dbuf->refcnt--; // unpin buffer from cache brelse(lbuf); brelse(dbuf); } @@ -203,7 +204,7 @@ commit() } // Caller has modified b->data and is done with the buffer. -// Record the block number and pin in the cache with B_DIRTY. +// Record the block number and pin in the cache by increasing refcnt. // commit()/write_log() will do the disk write. // // log_write() replaces bwrite(); a typical use is: @@ -227,9 +228,10 @@ log_write(struct buf *b) break; } log.lh.block[i] = b->blockno; - if (i == log.lh.n) + if (i == log.lh.n) { // Add new block to log? + b->refcnt++; // Pin block in cache log.lh.n++; - b->flags |= B_DIRTY; // prevent eviction + } release(&log.lock); } diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c index 3ace5ef..df4473d 100644 --- a/kernel/virtio_disk.c +++ b/kernel/virtio_disk.c @@ -164,7 +164,7 @@ alloc3_desc(int *idx) } void -virtio_disk_rw(struct buf *b) +virtio_disk_rw(struct buf *b, int write) { uint64 sector = b->blockno * (BSIZE / 512); @@ -192,7 +192,7 @@ virtio_disk_rw(struct buf *b) uint64 sector; } buf0; - if(b->flags & B_DIRTY) + if(write) buf0.type = VIRTIO_BLK_T_OUT; // write the disk else buf0.type = VIRTIO_BLK_T_IN; // read the disk @@ -208,7 +208,7 @@ virtio_disk_rw(struct buf *b) desc[idx[1]].addr = (uint64) b->data; desc[idx[1]].len = BSIZE; - if(b->flags & B_DIRTY) + if(write) desc[idx[1]].flags = 0; // device reads b->data else desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data @@ -235,7 +235,7 @@ virtio_disk_rw(struct buf *b) *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number // Wait for virtio_disk_intr() to say request has finished. - while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ + while(info[idx[0]].status == 0) { sleep(b, &vdisk_lock); } @@ -253,9 +253,7 @@ virtio_disk_intr() if(info[id].status != 0) panic("virtio_disk_intr status"); - info[id].b->flags |= B_VALID; - info[id].b->flags &= ~B_DIRTY; - + info[id].status = 1; wakeup(info[id].b); info[id].b = 0; From f1bb53c690051994f5a2c43ee900f9e335bd019c Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Tue, 30 Jul 2019 08:13:03 -0400 Subject: [PATCH 2/6] The driver should free descriptors, not interrupt handler. This avoids handler freeing descriptors before driver sees that the request has completed. --- kernel/virtio_disk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c index df4473d..9ebe695 100644 --- a/kernel/virtio_disk.c +++ b/kernel/virtio_disk.c @@ -239,6 +239,9 @@ virtio_disk_rw(struct buf *b, int write) sleep(b, &vdisk_lock); } + info[idx[0]].b = 0; + free_chain(idx[0]); + release(&vdisk_lock); } @@ -256,9 +259,6 @@ virtio_disk_intr() info[id].status = 1; wakeup(info[id].b); - info[id].b = 0; - free_chain(id); - used_idx = (used_idx + 1) % NUM; } From f37a3e396454268074f48517e3773f099846d0e3 Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Tue, 30 Jul 2019 08:54:43 -0400 Subject: [PATCH 3/6] Make pin/unpin explicit --- kernel/bio.c | 16 ++++++++++++++++ kernel/defs.h | 2 ++ kernel/log.c | 4 ++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/bio.c b/kernel/bio.c index 7455c06..a1074f2 100644 --- a/kernel/bio.c +++ b/kernel/bio.c @@ -133,3 +133,19 @@ brelse(struct buf *b) release(&bcache.lock); } + +void +bpin(struct buf *b) { + acquire(&bcache.lock); + b->refcnt++; + release(&bcache.lock); +} + +void +bunpin(struct buf *b) { + acquire(&bcache.lock); + b->refcnt--; + release(&bcache.lock); +} + + diff --git a/kernel/defs.h b/kernel/defs.h index 8421082..2689bed 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -14,6 +14,8 @@ void binit(void); struct buf* bread(uint, uint); void brelse(struct buf*); void bwrite(struct buf*); +void bpin(struct buf*); +void bunpin(struct buf*); // console.c void consoleinit(void); diff --git a/kernel/log.c b/kernel/log.c index 5aea267..59984db 100644 --- a/kernel/log.c +++ b/kernel/log.c @@ -77,7 +77,7 @@ install_trans(void) struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst bwrite(dbuf); // write dst to disk - dbuf->refcnt--; // unpin buffer from cache + bunpin(dbuf); brelse(lbuf); brelse(dbuf); } @@ -229,7 +229,7 @@ log_write(struct buf *b) } log.lh.block[i] = b->blockno; if (i == log.lh.n) { // Add new block to log? - b->refcnt++; // Pin block in cache + bpin(b); log.lh.n++; } release(&log.lock); From 87183da13d80555fde253823108be12667246079 Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Tue, 30 Jul 2019 10:01:22 -0400 Subject: [PATCH 4/6] An easier version of bcache assignment --- labs/lock.html | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/labs/lock.html b/labs/lock.html index b43a51b..a93eb3a 100644 --- a/labs/lock.html +++ b/labs/lock.html @@ -82,7 +82,7 @@ workloads.

Run usertests to see if you don't break anything. -

Lock-free bcache lookup

+

More scalabale bcache lookup

Several processes reading different files repeatedly will @@ -116,15 +116,19 @@ against, including:

A challenge is testing whether you code is still correct. One way to do is to artificially delay certain operations - using sleepticks. + using sleepticks. test1 trashes the buffer cache + and exercises more code paths.

Here are some hints:

    -
  • Use an atomic increment instruction for incrementing and - decrementing b->ref (see __sync_fetch_and_add() and - related primitives) -
  • Don't walk the bcache.head list to find a buffer +
  • Use a simple design: i.e., don't design a lock-free implementation. +
  • Use a simple hash table with locks per bucket
- + +

Check that your implementation has less contention + on test0 + +

Make sure your implementation passes bcachetest and usertests. + From 848d1906e81992c78bee9e9ce5a5d38e107265cc Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Tue, 30 Jul 2019 12:53:19 -0400 Subject: [PATCH 5/6] Track in buf if disk "owns" buffer --- kernel/buf.h | 1 + kernel/virtio_disk.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/buf.h b/kernel/buf.h index ae9d264..4a3a39d 100644 --- a/kernel/buf.h +++ b/kernel/buf.h @@ -1,5 +1,6 @@ struct buf { int valid; // has data been read from disk? + int disk; // does disk "own" buf? uint dev; uint blockno; struct sleeplock lock; diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c index 9ebe695..14c718d 100644 --- a/kernel/virtio_disk.c +++ b/kernel/virtio_disk.c @@ -222,6 +222,7 @@ virtio_disk_rw(struct buf *b, int write) desc[idx[2]].next = 0; // record struct buf for virtio_disk_intr(). + b->disk = 1; info[idx[0]].b = b; // avail[0] is flags @@ -235,7 +236,7 @@ virtio_disk_rw(struct buf *b, int write) *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number // Wait for virtio_disk_intr() to say request has finished. - while(info[idx[0]].status == 0) { + while(b->disk == 1) { sleep(b, &vdisk_lock); } @@ -255,8 +256,8 @@ virtio_disk_intr() if(info[id].status != 0) panic("virtio_disk_intr status"); - - info[id].status = 1; + + info[id].b->disk = 0; // disk is done with buf wakeup(info[id].b); used_idx = (used_idx + 1) % NUM; From 9c4f62e8e3e7f114c6f82a75579a815e6329d767 Mon Sep 17 00:00:00 2001 From: Frans Kaashoek Date: Tue, 30 Jul 2019 13:07:17 -0400 Subject: [PATCH 6/6] x --- labs/lock.html | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/labs/lock.html b/labs/lock.html index a93eb3a..707d6c4 100644 --- a/labs/lock.html +++ b/labs/lock.html @@ -98,7 +98,7 @@ workloads. and run test0 from bcachetest and you will see "!"s.

Modify bget so that a lookup for a buffer that is in the - bcache doesn't need to acquire bcache.lock. This more + bcache doesn't need to acquire bcache.lock. This is more tricky than the kalloc assignment, because bcache buffers are truly shared among processes. You must maintain the invariant that a buffer is only once in memory. @@ -121,8 +121,13 @@ against, including:

Here are some hints:

    +
  • Read the description of buffer cache in the xv6 book (Section 7.2).
  • Use a simple design: i.e., don't design a lock-free implementation. -
  • Use a simple hash table with locks per bucket +
  • Use a simple hash table with locks per bucket. +
  • Searching in hash table for a buffer and allocating an entry + for that buffer when the buffer is not found must be atomic. +
  • It is fine to acquire bcache.lock in brelse + to update the LRU/MRU list.

Check that your implementation has less contention @@ -130,5 +135,14 @@ against, including:

Make sure your implementation passes bcachetest and usertests. +

Optional: +

    +
  • make the buffer cache more scalable (e.g., avoid taking + out bcache.lock on brelse). +
  • make lookup lock-free (Hint: use gcc's __sync_* + functions.) How do you convince yourself that your implementation is correct? +
+ +