avoid deadlock by disk intr acking interrupt first, then processing ring
This commit is contained in:
parent
e3672e018a
commit
792d60e912
2 changed files with 37 additions and 21 deletions
|
@ -130,10 +130,13 @@ static void
|
||||||
free_desc(int i)
|
free_desc(int i)
|
||||||
{
|
{
|
||||||
if(i >= NUM)
|
if(i >= NUM)
|
||||||
panic("virtio_disk_intr 1");
|
panic("free_desc 1");
|
||||||
if(disk.free[i])
|
if(disk.free[i])
|
||||||
panic("virtio_disk_intr 2");
|
panic("free_desc 2");
|
||||||
disk.desc[i].addr = 0;
|
disk.desc[i].addr = 0;
|
||||||
|
disk.desc[i].len = 0;
|
||||||
|
disk.desc[i].flags = 0;
|
||||||
|
disk.desc[i].next = 0;
|
||||||
disk.free[i] = 1;
|
disk.free[i] = 1;
|
||||||
wakeup(&disk.free[0]);
|
wakeup(&disk.free[0]);
|
||||||
}
|
}
|
||||||
|
@ -143,9 +146,11 @@ static void
|
||||||
free_chain(int i)
|
free_chain(int i)
|
||||||
{
|
{
|
||||||
while(1){
|
while(1){
|
||||||
|
int flag = disk.desc[i].flags;
|
||||||
|
int nxt = disk.desc[i].next;
|
||||||
free_desc(i);
|
free_desc(i);
|
||||||
if(disk.desc[i].flags & VRING_DESC_F_NEXT)
|
if(flag & VRING_DESC_F_NEXT)
|
||||||
i = disk.desc[i].next;
|
i = nxt;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -184,7 +189,7 @@ virtio_disk_rw(struct buf *b, int write)
|
||||||
}
|
}
|
||||||
sleep(&disk.free[0], &disk.vdisk_lock);
|
sleep(&disk.free[0], &disk.vdisk_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
// format the three descriptors.
|
// format the three descriptors.
|
||||||
// qemu's virtio-blk.c reads them.
|
// qemu's virtio-blk.c reads them.
|
||||||
|
|
||||||
|
@ -217,7 +222,7 @@ virtio_disk_rw(struct buf *b, int write)
|
||||||
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
|
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
|
||||||
disk.desc[idx[1]].next = idx[2];
|
disk.desc[idx[1]].next = idx[2];
|
||||||
|
|
||||||
disk.info[idx[0]].status = 0;
|
disk.info[idx[0]].status = 0xff; // device writes 0 on success
|
||||||
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
|
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
|
||||||
disk.desc[idx[2]].len = 1;
|
disk.desc[idx[2]].len = 1;
|
||||||
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
|
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
|
||||||
|
@ -227,13 +232,17 @@ virtio_disk_rw(struct buf *b, int write)
|
||||||
b->disk = 1;
|
b->disk = 1;
|
||||||
disk.info[idx[0]].b = b;
|
disk.info[idx[0]].b = b;
|
||||||
|
|
||||||
// avail[0] is flags
|
// avail[0] is flags (always zero)
|
||||||
// avail[1] tells the device how far to look in avail[2...].
|
// avail[1] is an index into avail[2...] telling where we'll write next
|
||||||
// avail[2...] are desc[] indices the device should process.
|
// avail[2...] is a ring of NUM indices the device should process
|
||||||
// we only tell device the first index in our chain of descriptors.
|
// we only tell device the first index in our chain of descriptors.
|
||||||
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
|
disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
|
||||||
|
|
||||||
|
__sync_synchronize();
|
||||||
|
|
||||||
|
disk.avail[1] = disk.avail[1] + 1; // not % NUM ...
|
||||||
|
|
||||||
__sync_synchronize();
|
__sync_synchronize();
|
||||||
disk.avail[1] = disk.avail[1] + 1;
|
|
||||||
|
|
||||||
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
|
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
|
||||||
|
|
||||||
|
@ -253,18 +262,26 @@ virtio_disk_intr()
|
||||||
{
|
{
|
||||||
acquire(&disk.vdisk_lock);
|
acquire(&disk.vdisk_lock);
|
||||||
|
|
||||||
while((disk.used_idx % NUM) != (disk.used->id % NUM)){
|
// this ack may race with the device writing new notifications to
|
||||||
int id = disk.used->elems[disk.used_idx].id;
|
// the "used" ring, in which case we may get an interrupt we don't
|
||||||
|
// need, which is harmless.
|
||||||
|
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
|
||||||
|
|
||||||
|
__sync_synchronize();
|
||||||
|
|
||||||
|
while(disk.used_idx != disk.used->id){
|
||||||
|
__sync_synchronize();
|
||||||
|
int id = disk.used->elems[disk.used_idx % NUM].id;
|
||||||
|
|
||||||
if(disk.info[id].status != 0)
|
if(disk.info[id].status != 0)
|
||||||
panic("virtio_disk_intr status");
|
panic("virtio_disk_intr status");
|
||||||
|
|
||||||
disk.info[id].b->disk = 0; // disk is done with buf
|
|
||||||
wakeup(disk.info[id].b);
|
|
||||||
|
|
||||||
disk.used_idx = (disk.used_idx + 1) % NUM;
|
struct buf *b = disk.info[id].b;
|
||||||
|
b->disk = 0; // disk is done with buf
|
||||||
|
wakeup(b);
|
||||||
|
|
||||||
|
disk.used_idx += 1;
|
||||||
}
|
}
|
||||||
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
|
|
||||||
|
|
||||||
release(&disk.vdisk_lock);
|
release(&disk.vdisk_lock);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1734,6 +1734,7 @@ void
|
||||||
manywrites(char *s)
|
manywrites(char *s)
|
||||||
{
|
{
|
||||||
int nchildren = 4;
|
int nchildren = 4;
|
||||||
|
int howmany = 30; // increase to look for deadlock
|
||||||
|
|
||||||
for(int ci = 0; ci < nchildren; ci++){
|
for(int ci = 0; ci < nchildren; ci++){
|
||||||
int pid = fork();
|
int pid = fork();
|
||||||
|
@ -1749,7 +1750,7 @@ manywrites(char *s)
|
||||||
name[2] = '\0';
|
name[2] = '\0';
|
||||||
unlink(name);
|
unlink(name);
|
||||||
|
|
||||||
for(int iters = 0; iters < 500000; iters++){
|
for(int iters = 0; iters < howmany; iters++){
|
||||||
for(int i = 0; i < ci+1; i++){
|
for(int i = 0; i < ci+1; i++){
|
||||||
int fd = open(name, O_CREATE | O_RDWR);
|
int fd = open(name, O_CREATE | O_RDWR);
|
||||||
if(fd < 0){
|
if(fd < 0){
|
||||||
|
@ -1765,8 +1766,6 @@ manywrites(char *s)
|
||||||
close(fd);
|
close(fd);
|
||||||
}
|
}
|
||||||
unlink(name);
|
unlink(name);
|
||||||
if((iters % 50) == ci)
|
|
||||||
write(1, ".", 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unlink(name);
|
unlink(name);
|
||||||
|
@ -2737,7 +2736,7 @@ main(int argc, char *argv[])
|
||||||
void (*f)(char *);
|
void (*f)(char *);
|
||||||
char *s;
|
char *s;
|
||||||
} tests[] = {
|
} tests[] = {
|
||||||
// {manywrites, "manywrites"},
|
{manywrites, "manywrites"},
|
||||||
{execout, "execout"},
|
{execout, "execout"},
|
||||||
{copyin, "copyin"},
|
{copyin, "copyin"},
|
||||||
{copyout, "copyout"},
|
{copyout, "copyout"},
|
||||||
|
|
Loading…
Reference in a new issue