From 792d60e91224bc15ce41aaff2560a82b63fdf06f Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Sun, 4 Oct 2020 09:21:03 -0400 Subject: [PATCH] avoid deadlock by disk intr acking interrupt first, then processing ring --- kernel/virtio_disk.c | 51 +++++++++++++++++++++++++++++--------------- user/usertests.c | 7 +++--- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c index 06e0645..1dbd6ed 100644 --- a/kernel/virtio_disk.c +++ b/kernel/virtio_disk.c @@ -130,10 +130,13 @@ static void free_desc(int i) { if(i >= NUM) - panic("virtio_disk_intr 1"); + panic("free_desc 1"); if(disk.free[i]) - panic("virtio_disk_intr 2"); + panic("free_desc 2"); disk.desc[i].addr = 0; + disk.desc[i].len = 0; + disk.desc[i].flags = 0; + disk.desc[i].next = 0; disk.free[i] = 1; wakeup(&disk.free[0]); } @@ -143,9 +146,11 @@ static void free_chain(int i) { while(1){ + int flag = disk.desc[i].flags; + int nxt = disk.desc[i].next; free_desc(i); - if(disk.desc[i].flags & VRING_DESC_F_NEXT) - i = disk.desc[i].next; + if(flag & VRING_DESC_F_NEXT) + i = nxt; else break; } @@ -184,7 +189,7 @@ virtio_disk_rw(struct buf *b, int write) } sleep(&disk.free[0], &disk.vdisk_lock); } - + // format the three descriptors. // qemu's virtio-blk.c reads them. @@ -217,7 +222,7 @@ virtio_disk_rw(struct buf *b, int write) disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; disk.desc[idx[1]].next = idx[2]; - disk.info[idx[0]].status = 0; + disk.info[idx[0]].status = 0xff; // device writes 0 on success disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status; disk.desc[idx[2]].len = 1; disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status @@ -227,13 +232,17 @@ virtio_disk_rw(struct buf *b, int write) b->disk = 1; disk.info[idx[0]].b = b; - // avail[0] is flags - // avail[1] tells the device how far to look in avail[2...]. - // avail[2...] are desc[] indices the device should process. + // avail[0] is flags (always zero) + // avail[1] is an index into avail[2...] telling where we'll write next + // avail[2...] is a ring of NUM indices the device should process // we only tell device the first index in our chain of descriptors. disk.avail[2 + (disk.avail[1] % NUM)] = idx[0]; + + __sync_synchronize(); + + disk.avail[1] = disk.avail[1] + 1; // not % NUM ... + __sync_synchronize(); - disk.avail[1] = disk.avail[1] + 1; *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number @@ -253,18 +262,26 @@ virtio_disk_intr() { acquire(&disk.vdisk_lock); - while((disk.used_idx % NUM) != (disk.used->id % NUM)){ - int id = disk.used->elems[disk.used_idx].id; + // this ack may race with the device writing new notifications to + // the "used" ring, in which case we may get an interrupt we don't + // need, which is harmless. + *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; + + __sync_synchronize(); + + while(disk.used_idx != disk.used->id){ + __sync_synchronize(); + int id = disk.used->elems[disk.used_idx % NUM].id; if(disk.info[id].status != 0) panic("virtio_disk_intr status"); - - disk.info[id].b->disk = 0; // disk is done with buf - wakeup(disk.info[id].b); - disk.used_idx = (disk.used_idx + 1) % NUM; + struct buf *b = disk.info[id].b; + b->disk = 0; // disk is done with buf + wakeup(b); + + disk.used_idx += 1; } - *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; release(&disk.vdisk_lock); } diff --git a/user/usertests.c b/user/usertests.c index 720e3cf..ec6630d 100644 --- a/user/usertests.c +++ b/user/usertests.c @@ -1734,6 +1734,7 @@ void manywrites(char *s) { int nchildren = 4; + int howmany = 30; // increase to look for deadlock for(int ci = 0; ci < nchildren; ci++){ int pid = fork(); @@ -1749,7 +1750,7 @@ manywrites(char *s) name[2] = '\0'; unlink(name); - for(int iters = 0; iters < 500000; iters++){ + for(int iters = 0; iters < howmany; iters++){ for(int i = 0; i < ci+1; i++){ int fd = open(name, O_CREATE | O_RDWR); if(fd < 0){ @@ -1765,8 +1766,6 @@ manywrites(char *s) close(fd); } unlink(name); - if((iters % 50) == ci) - write(1, ".", 1); } unlink(name); @@ -2737,7 +2736,7 @@ main(int argc, char *argv[]) void (*f)(char *); char *s; } tests[] = { - // {manywrites, "manywrites"}, + {manywrites, "manywrites"}, {execout, "execout"}, {copyin, "copyin"}, {copyout, "copyout"},