diff --git a/kernel/proc.c b/kernel/proc.c index 0d91a59..4f6caee 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -286,11 +286,11 @@ fork(void) } // Pass p's abandoned children to init. -// Caller must hold p->lock and parent->lock. +// Caller must hold p->lock. void -reparent(struct proc *p, struct proc *parent) { +reparent(struct proc *p) +{ struct proc *pp; - int child_of_init = (p->parent == initproc); for(pp = proc; pp < &proc[NPROC]; pp++){ // this code uses pp->parent without holding pp->lock. @@ -302,13 +302,10 @@ reparent(struct proc *p, struct proc *parent) { // because only the parent changes it, and we're the parent. acquire(&pp->lock); pp->parent = initproc; - if(pp->state == ZOMBIE) { - if(!child_of_init) - acquire(&initproc->lock); - wakeup1(initproc); - if(!child_of_init) - release(&initproc->lock); - } + // we should wake up init here, but that would require + // initproc->lock, which would be a deadlock, since we hold + // the lock on one of init's children (pp). this is why + // exit() always wakes init (before acquiring any locks). release(&pp->lock); } } @@ -339,20 +336,38 @@ exit(int status) end_op(); p->cwd = 0; - acquire(&p->parent->lock); - + // we might re-parent a child to init. we can't be precise + // about waking up init, since we can't acquire its lock once + // we've acquired any other proc lock. so wake up init whether + // that's necessary or not. init may miss this wakeup, + // that that seems harmless. + acquire(&initproc->lock); + wakeup1(initproc); + release(&initproc->lock); + + // grab a copy of p->parent, to ensure that we unlock the + // same parent we locked. in case our parent gives us away + // to init while we're waiting for the parent lock. + acquire(&p->lock); + struct proc *original_parent = p->parent; + release(&p->lock); + + // we need the parent's lock in order to wake it up from wait(). + // the parent-then-child rule says we have to lock it first. + acquire(&original_parent->lock); + acquire(&p->lock); // Give any children to init. - reparent(p, p->parent); + reparent(p); // Parent might be sleeping in wait(). - wakeup1(p->parent); + wakeup1(original_parent); p->xstate = status; p->state = ZOMBIE; - release(&p->parent->lock); + release(&original_parent->lock); // Jump into the scheduler, never to return. sched(); @@ -564,6 +579,8 @@ wakeup(void *chan) static void wakeup1(struct proc *p) { + if(!holding(&p->lock)) + panic("wakeup1"); if(p->chan == p && p->state == SLEEPING) { p->state = RUNNABLE; } diff --git a/user/usertests.c b/user/usertests.c index fe3ae49..db9f680 100644 --- a/user/usertests.c +++ b/user/usertests.c @@ -598,6 +598,31 @@ forkforkfork(char *s) sleep(10); // one second } +// regression test. does reparent() violate the parent-then-child +// locking order when giving away a child to init, so that exit() +// deadlocks against init's wait()? also used to trigger a "panic: +// release" due to exit() releasing a different p->parent->lock than +// it acquired. +void +reparent2(char *s) +{ + for(int i = 0; i < 800; i++){ + int pid1 = fork(); + if(pid1 < 0){ + printf("fork failed\n"); + exit(1); + } + if(pid1 == 0){ + fork(); + fork(); + exit(0); + } + wait(0); + } + + exit(0); +} + // allocate all mem, free it, and allocate again void mem(char *s) @@ -1937,9 +1962,9 @@ stacktest(char *s) exit(xstatus); } -// copyin(), copyout(), and copyinstr() used to cast the virtual page -// address to uint, which (with certain wild system call arguments) -// resulted in a kernel page faults. +// regression test. copyin(), copyout(), and copyinstr() used to cast +// the virtual page address to uint, which (with certain wild system +// call arguments) resulted in a kernel page faults. void pgbug(char *s) { @@ -1952,9 +1977,9 @@ pgbug(char *s) exit(0); } -// does the kernel panic if a process sbrk()s its size to be less than -// a page, or zero, or reduces the break by an amount too small to -// cause a page to be freed? +// regression test. does the kernel panic if a process sbrk()s its +// size to be less than a page, or zero, or reduces the break by an +// amount too small to cause a page to be freed? void sbrkbugs(char *s) { @@ -2010,13 +2035,11 @@ sbrkbugs(char *s) exit(0); } -// does write() with an invalid buffer pointer cause -// a block to be allocated for a file that is then -// not freed when the file is deleted? if the kernel -// has this bug, it will panic: balloc: out of blocks. -// assumed_free may need to be raised to be -// more than the number of free blocks. -// this test takes a long time. +// regression test. does write() with an invalid buffer pointer cause +// a block to be allocated for a file that is then not freed when the +// file is deleted? if the kernel has this bug, it will panic: balloc: +// out of blocks. assumed_free may need to be raised to be more than +// the number of free blocks. this test takes a long time. void badwrite(char *s) { @@ -2049,9 +2072,8 @@ badwrite(char *s) exit(0); } -// test whether exec() leaks memory if one of the -// arguments is invalid. the test passes if -// the kernel doesn't panic. +// regression test. test whether exec() leaks memory if one of the +// arguments is invalid. the test passes if the kernel doesn't panic. void badarg(char *s) { @@ -2102,6 +2124,7 @@ main(int argc, char *argv[]) void (*f)(char *); char *s; } tests[] = { + {reparent2, "reparent2"}, {pgbug, "pgbug" }, {sbrkbugs, "sbrkbugs" }, // {badwrite, "badwrite" },