diff --git a/Notes b/Notes
index d8f4869..16f6542 100644
--- a/Notes
+++ b/Notes
@@ -85,17 +85,44 @@ test pipe reader closes then write
 test two readers, two writers.
 test children being inherited by grandparent &c
 
-kill
-  sleep()ing for something
-  running at user level
-  running in kernel
-  ooh, the relevant CPU may never get a clock interrupt
-  should each cpu have its own clock?
-  where to check?
-    loops around sleep()
-    return from any trap
-  rules about being killed deep inside a system call
-  test above cases
+some sleep()s should be interruptible by kill()
 
 cli/sti in acquire/release should nest!
   in case you acquire two locks
+
+what would need fixing if we got rid of kernel_lock?
+  console output
+  proc_exit() needs lock on proc *array* to deallocate
+  kill() needs lock on proc *array*
+  allocator's free list
+  global fd table (really free-ness)
+  sys_close() on fd table
+  fork on proc list, also next pid
+    hold lock until public slots in proc struct initialized
+
+locks
+  init_lock
+    sequences CPU startup
+  proc_table_lock
+    also protects next_pid
+  per-fd lock *just* protects count read-modify-write
+    also maybe freeness?
+  memory allocator
+  printf
+
+wakeup needs proc_table_lock
+  so we need recursive locks?
+  or you must hold the lock to call wakeup?
+
+if locks contain proc *, they can't be used at interrupt time
+  only proc_table_lock will be used at interrupt time?
+  maybe it doesn't matter if we use curproc?
+
+in general, the table locks protect both free-ness and
+  public variables of table elements
+  in many cases you can use table elements w/o a lock
+  e.g. if you are the process, or you are using an fd
+
+why can't i get a lock in console code?
+  always triple fault
+lock code shouldn't call cprintf...
diff --git a/console.c b/console.c
index 97de59e..3745009 100644
--- a/console.c
+++ b/console.c
@@ -1,6 +1,9 @@
 #include <types.h>
 #include <x86.h>
 #include "defs.h"
+#include "spinlock.h"
+
+struct spinlock console_lock;
 
 /*
  * copy console output to parallel port, which you can tell
@@ -26,6 +29,8 @@ cons_putc(int c)
   unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory
   int ind;
 
+  //acquire(&console_lock);
+
   lpt_putc(c);
 
   // cursor position, 16 bits, col + 80*row
@@ -56,6 +61,8 @@ cons_putc(int c)
   outb(crtport + 1, ind >> 8);
   outb(crtport, 15);
   outb(crtport + 1, ind);
+
+  //release(&console_lock);
 }
 
 void
diff --git a/defs.h b/defs.h
index 6a13ad2..c5dc4d8 100644
--- a/defs.h
+++ b/defs.h
@@ -13,7 +13,7 @@ struct proc;
 struct jmpbuf;
 void setupsegs(struct proc *);
 struct proc * newproc(void);
-void swtch(void);
+void swtch(int);
 void sleep(void *);
 void wakeup(void *);
 void scheduler(void);
@@ -55,10 +55,9 @@ void lapic_enableintr(void);
 void lapic_disableintr(void);
 
 // spinlock.c
-extern uint32_t kernel_lock;
-void acquire_spinlock(uint32_t* lock);
-void release_spinlock(uint32_t* lock);
-void release_grant_spinlock(uint32_t* lock, int cpu);
+struct spinlock;
+void acquire(struct spinlock * lock);
+void release(struct spinlock * lock);
 
 // main.c
 void load_icode(struct proc *p, uint8_t *binary, unsigned size);
@@ -77,6 +76,7 @@ struct fd * fd_alloc();
 void fd_close(struct fd *);
 int fd_read(struct fd *fd, char *addr, int n);
 int fd_write(struct fd *fd, char *addr, int n);
+void fd_reference(struct fd *fd);
 
 // ide.c
 void ide_init(void);
diff --git a/fd.c b/fd.c
index 8b59605..0f7028f 100644
--- a/fd.c
+++ b/fd.c
@@ -5,6 +5,9 @@
 #include "proc.h"
 #include "defs.h"
 #include "fd.h"
+#include "spinlock.h"
+
+struct spinlock fd_table_lock;
 
 struct fd fds[NFD];
 
@@ -22,18 +25,24 @@ fd_ualloc()
   return -1;
 }
 
+/*
+ * allocate a file descriptor structure
+ */
 struct fd *
 fd_alloc()
 {
   int i;
 
+  acquire(&fd_table_lock);
   for(i = 0; i < NFD; i++){
     if(fds[i].type == FD_CLOSED){
       fds[i].type = FD_NONE;
       fds[i].count = 1;
+      release(&fd_table_lock);
       return fds + i;
     }
   }
+  release(&fd_table_lock);
   return 0;
 }
 
@@ -69,8 +78,11 @@ fd_read(struct fd *fd, char *addr, int n)
 void
 fd_close(struct fd *fd)
 {
+  acquire(&fd_table_lock);
+
   if(fd->count < 1 || fd->type == FD_CLOSED)
     panic("fd_close");
+
   fd->count -= 1;
 
   if(fd->count == 0){
@@ -79,6 +91,19 @@ fd_close(struct fd *fd)
     } else {
       panic("fd_close");
     }
+    fd->count = 0;
     fd->type = FD_CLOSED;
   }
+  
+  release(&fd_table_lock);
+}
+
+void
+fd_reference(struct fd *fd)
+{
+  acquire(&fd_table_lock);
+  if(fd->count < 1 || fd->type == FD_CLOSED)
+    panic("fd_reference");
+  fd->count += 1;
+  release(&fd_table_lock);
 }
diff --git a/kalloc.c b/kalloc.c
index b14a69a..969b81d 100644
--- a/kalloc.c
+++ b/kalloc.c
@@ -10,6 +10,9 @@
 #include "param.h"
 #include "types.h"
 #include "defs.h"
+#include "spinlock.h"
+
+struct spinlock kalloc_lock;
 
 struct run {
   struct run *next;
@@ -54,6 +57,8 @@ kfree(char *cp, int len)
   for(i = 0; i < len; i++)
     cp[i] = 1;
 
+  acquire(&kalloc_lock);
+
   rr = &freelist;
   while(*rr){
     struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len);
@@ -63,13 +68,13 @@ kfree(char *cp, int len)
       p->len = len + (*rr)->len;
       p->next = (*rr)->next;
       *rr = p;
-      return;
+      goto out;
     }
     if(pend < *rr){
       p->len = len;
       p->next = *rr;
       *rr = p;
-      return;
+      goto out;
     }
     if(p == rend){
       (*rr)->len += len;
@@ -77,13 +82,16 @@ kfree(char *cp, int len)
         (*rr)->len += (*rr)->next->len;
         (*rr)->next = (*rr)->next->next;
       }
-      return;
+      goto out;
     }
     rr = &((*rr)->next);
   }
   p->len = len;
   p->next = 0;
   *rr = p;
+
+ out:
+  release(&kalloc_lock);
 }
 
 /*
@@ -99,20 +107,25 @@ kalloc(int n)
   if(n % PAGE)
     panic("kalloc");
 
+  acquire(&kalloc_lock);
+
   rr = &freelist;
   while(*rr){
     struct run *r = *rr;
     if(r->len == n){
       *rr = r->next;
+      release(&kalloc_lock);
       return (char *) r;
     }
     if(r->len > n){
       char *p = (char *)r + (r->len - n);
       r->len -= n;
+      release(&kalloc_lock);
       return p;
     }
     rr = &(*rr)->next;
   }
+  release(&kalloc_lock);
   return 0;
 }
 
diff --git a/main.c b/main.c
index c6051ed..9ba78a9 100644
--- a/main.c
+++ b/main.c
@@ -15,8 +15,6 @@ extern char _binary_user1_start[], _binary_user1_size[];
 extern char _binary_usertests_start[], _binary_usertests_size[];
 extern char _binary_userfs_start[], _binary_userfs_size[];
 
-char buf[512];
-
 int
 main()
 {
@@ -24,8 +22,6 @@ main()
 
   if (acpu) {
     cprintf("an application processor\n");
-    release_spinlock(&kernel_lock);
-    acquire_spinlock(&kernel_lock);
     idtinit(); // CPU's idt
     lapic_init(cpu());
     lapic_timerinit();
@@ -39,7 +35,6 @@ main()
   cprintf("\nxV6\n\n");
 
   pic_init(); // initialize PIC
-  mp_init(); // multiprocessor
   kinit(); // physical memory allocator
   tvinit(); // trap vectors
   idtinit(); // CPU's idt
@@ -61,11 +56,14 @@ main()
   p->ppid = 0;
   setupsegs(p);
 
+  mp_init(); // multiprocessor
+
   // turn on timer and enable interrupts on the local APIC
   lapic_timerinit();
   lapic_enableintr();
+
   // init disk device
-  ide_init(); 
+  //ide_init(); 
 
   // become interruptable
   sti();
@@ -74,7 +72,9 @@ main()
   
   load_icode(p, _binary_usertests_start, (unsigned) _binary_usertests_size);
   //load_icode(p, _binary_userfs_start, (unsigned) _binary_userfs_size);
+  p->state = RUNNABLE;
   cprintf("loaded userfs\n");
+
   scheduler();
 
   return 0;
diff --git a/mp.c b/mp.c
index 2b2a612..4258aba 100644
--- a/mp.c
+++ b/mp.c
@@ -391,15 +391,11 @@ mp_init()
   memmove((void *) APBOOTCODE,_binary_bootother_start, 
 	  (uint32_t) _binary_bootother_size);
 
-  acquire_spinlock(&kernel_lock);
   for(c = 0; c < ncpu; c++){
     if (cpus+c == bcpu) continue;
     cprintf ("starting processor %d\n", c);
-    release_grant_spinlock(&kernel_lock, c);
     *(unsigned *)(APBOOTCODE-4) = (unsigned) (cpus[c].mpstack) + MPSTACK; // tell it what to use for %esp
     *(unsigned *)(APBOOTCODE-8) = (unsigned)&main; // tell it where to jump to
     lapic_startap(cpus + c, (uint32_t) APBOOTCODE);
-    acquire_spinlock(&kernel_lock);
-    cprintf ("done starting processor %d\n", c);
   }
 }
diff --git a/pipe.c b/pipe.c
index 3aed4e9..3975244 100644
--- a/pipe.c
+++ b/pipe.c
@@ -5,6 +5,7 @@
 #include "proc.h"
 #include "defs.h"
 #include "fd.h"
+#include "spinlock.h"
 
 #define PIPESIZE 512
 
@@ -13,6 +14,7 @@ struct pipe {
   int writeopen; // write fd is still open
   int writep; // next index to write
   int readp;  // next index to read
+  struct spinlock lock;
   char data[PIPESIZE];
 };
 
@@ -32,6 +34,7 @@ pipe_alloc(struct fd **fd1, struct fd **fd2)
   p->writeopen = 1;
   p->writep = 0;
   p->readp = 0;
+  memset(&p->lock, 0, sizeof(p->lock));
   (*fd1)->type = FD_PIPE;
   (*fd1)->readable = 1;
   (*fd1)->writeable = 0;
@@ -74,16 +77,21 @@ pipe_write(struct pipe *p, char *addr, int n)
 {
   int i;
 
+  acquire(&p->lock);
+
   for(i = 0; i < n; i++){
     while(((p->writep + 1) % PIPESIZE) == p->readp){
       if(p->readopen == 0)
         return -1;
+      release(&p->lock);
       wakeup(&p->readp);
       sleep(&p->writep);
+      acquire(&p->lock);
     }
     p->data[p->writep] = addr[i];
     p->writep = (p->writep + 1) % PIPESIZE;
   }
+  release(&p->lock);
   wakeup(&p->readp);
   return i;
 }
@@ -99,12 +107,14 @@ pipe_read(struct pipe *p, char *addr, int n)
     sleep(&p->readp);
   }
 
+  acquire(&p->lock);
   for(i = 0; i < n; i++){
     if(p->readp == p->writep)
       break;
     addr[i] = p->data[p->readp];
     p->readp = (p->readp + 1) % PIPESIZE;
   }
+  release(&p->lock);
   wakeup(&p->writep);
   return i;
 }
diff --git a/proc.c b/proc.c
index d7fc638..07a88d8 100644
--- a/proc.c
+++ b/proc.c
@@ -5,6 +5,9 @@
 #include "fd.h"
 #include "proc.h"
 #include "defs.h"
+#include "spinlock.h"
+
+struct spinlock proc_table_lock;
 
 struct proc proc[NPROC];
 struct proc *curproc[NCPU];
@@ -43,6 +46,7 @@ extern void trapret();
 /*
  * internal fork(). does not copy kernel stack; instead,
  * sets up the stack to return as if from system call.
+ * caller must set state to RUNNABLE.
  */
 struct proc *
 newproc()
@@ -51,11 +55,18 @@ newproc()
   struct proc *op;
   int fd;
 
-  for(np = &proc[1]; np < &proc[NPROC]; np++)
-    if(np->state == UNUSED)
+  acquire(&proc_table_lock);
+
+  for(np = &proc[1]; np < &proc[NPROC]; np++){
+    if(np->state == UNUSED){
+      np->state = EMBRYO;
       break;
-  if(np >= &proc[NPROC])
+    }
+  }
+  if(np >= &proc[NPROC]){
+    release(&proc_table_lock);
     return 0;
+  }
 
   // copy from proc[0] if we're bootstrapping
   op = curproc[cpu()];
@@ -64,6 +75,9 @@ newproc()
 
   np->pid = next_pid++;
   np->ppid = op->pid;
+
+  release(&proc_table_lock);
+
   np->sz = op->sz;
   np->mem = kalloc(op->sz);
   if(np->mem == 0)
@@ -72,6 +86,7 @@ newproc()
   np->kstack = kalloc(KSTACKSIZE);
   if(np->kstack == 0){
     kfree(np->mem, op->sz);
+    np->state = UNUSED;
     return 0;
   }
   setupsegs(np);
@@ -91,11 +106,9 @@ newproc()
   for(fd = 0; fd < NOFILE; fd++){
     np->fds[fd] = op->fds[fd];
     if(np->fds[fd])
-      np->fds[fd]->count += 1;
+      fd_reference(np->fds[fd]);
   }
 
-  np->state = RUNNABLE;
-
   cprintf("newproc %x\n", np);
 
   return np;
@@ -111,11 +124,20 @@ scheduler(void)
   cpus[cpu()].lastproc = &proc[0];
 
   setjmp(&cpus[cpu()].jmpbuf);
-  
+
+  op = curproc[cpu()];
+  if(op){
+    if(op->newstate <= 0 || op->newstate > ZOMBIE)
+      panic("scheduler");
+    op->state = op->newstate;
+    op->newstate = -1;
+  }
+
   // find a runnable process and switch to it
   curproc[cpu()] = 0;
   np = cpus[cpu()].lastproc + 1;
   while(1){
+    acquire(&proc_table_lock);
     for(i = 0; i < NPROC; i++){
       if(np >= &proc[NPROC])
         np = &proc[0];
@@ -123,20 +145,20 @@ scheduler(void)
         break;
       np++;
     }
-    if(i < NPROC)
+
+    if(i < NPROC){
+      np->state = RUNNING;
+      release(&proc_table_lock);
       break;
-    // cprintf("swtch %d: nothing to run %d %d\n",
-            // cpu(), proc[1].state, proc[2].state);
-    release_spinlock(&kernel_lock);
-    acquire_spinlock(&kernel_lock);
+    }
+    
+    release(&proc_table_lock);
     np = &proc[0];
   }
 
   cpus[cpu()].lastproc = np;
   curproc[cpu()] = np;
   
-  np->state = RUNNING;
-
   // h/w sets busy bit in TSS descriptor sometimes, and faults
   // if it's set in LTR. so clear tss descriptor busy bit.
   np->gdt[SEG_TSS].sd_type = STS_T32A;
@@ -155,11 +177,12 @@ scheduler(void)
 // give up the cpu by switching to the scheduler,
 // which runs on the per-cpu stack.
 void
-swtch(void)
+swtch(int newstate)
 {
   struct proc *p = curproc[cpu()];
   if(p == 0)
     panic("swtch");
+  p->newstate = newstate; // basically an argument to scheduler()
   if(setjmp(&p->jmpbuf) == 0)
     longjmp(&cpus[cpu()].jmpbuf);
 }
@@ -171,8 +194,7 @@ sleep(void *chan)
   if(p == 0)
     panic("sleep");
   p->chan = chan;
-  p->state = WAITING;
-  swtch();
+  swtch(WAITING);
 }
 
 void
@@ -180,9 +202,11 @@ wakeup(void *chan)
 {
   struct proc *p;
 
+  acquire(&proc_table_lock);
   for(p = proc; p < &proc[NPROC]; p++)
     if(p->state == WAITING && p->chan == chan)
       p->state = RUNNABLE;
+  release(&proc_table_lock);
 }
 
 // give up the CPU but stay marked as RUNNABLE
@@ -191,8 +215,7 @@ yield()
 {
   if(curproc[cpu()] == 0 || curproc[cpu()]->state != RUNNING)
     panic("yield");
-  curproc[cpu()]->state = RUNNABLE;
-  swtch();
+  swtch(RUNNABLE);
 }
 
 void
@@ -211,7 +234,7 @@ proc_exit()
     }
   }
 
-  cp->state = ZOMBIE;
+  acquire(&proc_table_lock);
 
   // wake up parent
   for(p = proc; p < &proc[NPROC]; p++)
@@ -223,6 +246,8 @@ proc_exit()
     if(p->ppid == cp->pid)
       p->pid = 1;
 
+  acquire(&proc_table_lock);
+
   // switch into scheduler
-  swtch();
+  swtch(ZOMBIE);
 }
diff --git a/proc.h b/proc.h
index 86ba0eb..b9ec246 100644
--- a/proc.h
+++ b/proc.h
@@ -33,11 +33,14 @@ struct jmpbuf {
   int jb_eip;
 };
 
+enum proc_state { UNUSED, EMBRYO, WAITING, RUNNABLE, RUNNING, ZOMBIE };
+
 struct proc{
   char *mem; // start of process's physical memory
   unsigned sz; // total size of mem, including kernel stack
   char *kstack; // kernel stack, separate from mem so it doesn't move
-  enum { UNUSED, RUNNABLE, WAITING, ZOMBIE, RUNNING } state;
+  enum proc_state state;
+  enum proc_state newstate; // desired state after swtch()
   int pid;
   int ppid;
   void *chan; // sleep
diff --git a/spinlock.c b/spinlock.c
index d73faff..9f840a8 100644
--- a/spinlock.c
+++ b/spinlock.c
@@ -2,51 +2,50 @@
 #include "defs.h"
 #include "x86.h"
 #include "mmu.h"
+#include "param.h"
+#include "proc.h"
+#include "spinlock.h"
 
-#define LOCK_FREE -1
 #define DEBUG 0
 
-uint32_t kernel_lock = LOCK_FREE;
-
 int getcallerpc(void *v) {
   return ((int*)v)[-1];
 }
 
-// lock = LOCK_FREE if free, else = cpu_id of owner CPU
 void
-acquire_spinlock(uint32_t* lock)
+acquire(struct spinlock * lock)
 {
-  int cpu_id = cpu();
+  struct proc * cp = curproc[cpu()];
 
   // on a real machine there would be a memory barrier here
-  if(DEBUG) cprintf("cpu%d: acquiring at %x\n", cpu_id, getcallerpc(&lock));
-  cli();
-  if (*lock == cpu_id)
-    panic("recursive lock");
-  
-  while ( cmpxchg(LOCK_FREE, cpu_id, lock) != cpu_id ) { ; }
-  if(DEBUG) cprintf("cpu%d: acquired at %x\n", cpu_id, getcallerpc(&lock));
+  if(DEBUG) cprintf("cpu%d: acquiring at %x\n", cpu(), getcallerpc(&lock));
+  if (cp && lock->p == cp && lock->locked){
+    lock->count += 1;
+  } else { 
+    cli();
+    while ( cmpxchg(0, 1, &lock->locked) != 1 ) { ; }
+    lock->locker_pc = getcallerpc(&lock);
+    lock->count = 1;
+    lock->p = cp;
+  }
+  if(DEBUG) cprintf("cpu%d: acquired at %x\n", cpu(), getcallerpc(&lock));
 }
 
 void
-release_spinlock(uint32_t* lock)
+release(struct spinlock * lock)
 {
-  int cpu_id = cpu();
-  if(DEBUG) cprintf ("cpu%d: releasing at %x\n", cpu_id, getcallerpc(&lock));
-  if (*lock != cpu_id)
-    panic("release_spinlock: releasing a lock that i don't own\n");
-  *lock = LOCK_FREE;
-  // on a real machine there would be a memory barrier here
-  sti();
-}
+  struct proc * cp = curproc[cpu()];
 
-void
-release_grant_spinlock(uint32_t* lock, int c)
-{
-  int cpu_id = cpu();
-  if(DEBUG) cprintf ("cpu%d: release_grant to %d at %x\n", cpu_id, c, getcallerpc(&lock));
-  if (*lock != cpu_id)
-    panic("release_spinlock: releasing a lock that i don't own\n");
-  *lock = c;
-}
+  if(DEBUG) cprintf ("cpu%d: releasing at %x\n", cpu(), getcallerpc(&lock));
 
+  if(lock->p != cp || lock->count < 1 || lock->locked != 1)
+    panic("release");
+
+  lock->count -= 1;
+  if(lock->count < 1){
+    lock->p = 0;
+    cmpxchg(1, 0, &lock->locked);
+    sti();
+    // on a real machine there would be a memory barrier here
+  }
+}
diff --git a/syscall.c b/syscall.c
index 4ecb31c..e93fb00 100644
--- a/syscall.c
+++ b/syscall.c
@@ -6,6 +6,7 @@
 #include "x86.h"
 #include "traps.h"
 #include "syscall.h"
+#include "spinlock.h"
 
 /*
  * User code makes a system call with INT T_SYSCALL.
@@ -18,6 +19,8 @@
  * Return value? Error indication? Errno?
  */
 
+extern struct spinlock proc_table_lock;
+
 /*
  * fetch 32 bits from a user-supplied pointer.
  * returns 1 if addr was OK, 0 if illegal.
@@ -149,6 +152,7 @@ sys_fork()
   struct proc *np;
 
   np = newproc();
+  np->state = RUNNABLE;
   return np->pid;
 }
 
@@ -170,18 +174,21 @@ sys_wait()
 
   while(1){
     any = 0;
+    acquire(&proc_table_lock);
     for(p = proc; p < &proc[NPROC]; p++){
       if(p->state == ZOMBIE && p->ppid == cp->pid){
         kfree(p->mem, p->sz);
         kfree(p->kstack, KSTACKSIZE);
         pid = p->pid;
         p->state = UNUSED;
+        release(&proc_table_lock);
         cprintf("%x collected %x\n", cp, p);
         return pid;
       }
       if(p->state != UNUSED && p->ppid == cp->pid)
         any = 1;
     }
+    release(&proc_table_lock);
     if(any == 0){
       cprintf("%x nothing to wait for\n", cp);
       return -1;
@@ -232,14 +239,17 @@ sys_kill()
   struct proc *p;
 
   fetcharg(0, &pid);
+  acquire(&proc_table_lock);
   for(p = proc; p < &proc[NPROC]; p++){
     if(p->pid == pid && p->state != UNUSED){
       p->killed = 1;
       if(p->state == WAITING)
         p->state = RUNNABLE;
+      release(&proc_table_lock);
       return 0;
     }
   }
+  release(&proc_table_lock);
   return -1;
 }
 
diff --git a/trap.c b/trap.c
index d7739f7..862225b 100644
--- a/trap.c
+++ b/trap.c
@@ -5,6 +5,7 @@
 #include "defs.h"
 #include "x86.h"
 #include "traps.h"
+#include "syscall.h"
 
 struct Gatedesc idt[256];
 struct Pseudodesc idt_pd = { 0, sizeof(idt) - 1, (unsigned) &idt };
@@ -35,12 +36,6 @@ trap(struct Trapframe *tf)
 {
   int v = tf->tf_trapno;
 
-  if(tf->tf_cs == 0x8 && kernel_lock == cpu())
-    cprintf("cpu %d: trap %d from %x:%x with lock=%d\n",
-            cpu(), v, tf->tf_cs, tf->tf_eip, kernel_lock);
-
-  acquire_spinlock(&kernel_lock); // released in trapret in trapasm.S
-
   if(v == T_SYSCALL){
     struct proc *cp = curproc[cpu()];
     if(cp == 0)
@@ -55,7 +50,8 @@ trap(struct Trapframe *tf)
       panic("trap ret but not RUNNING");
     if(tf != cp->tf)
       panic("trap ret wrong tf");
-    if(read_esp() < (unsigned)cp->kstack || read_esp() >= (unsigned)cp->kstack + KSTACKSIZE)
+    if(read_esp() < (unsigned)cp->kstack ||
+       read_esp() >= (unsigned)cp->kstack + KSTACKSIZE)
       panic("trap ret esp wrong");
     if(cp->killed)
       proc_exit();
diff --git a/trapasm.S b/trapasm.S
index 4494642..2608328 100644
--- a/trapasm.S
+++ b/trapasm.S
@@ -22,10 +22,6 @@ alltraps:
          * expects ESP to point to a Trapframe
          */
 trapret:
-        pushl $kernel_lock
-        call release_spinlock
-        addl $0x4, %esp
-
         popal
         popl %es
         popl %ds
diff --git a/usertests.c b/usertests.c
index fa1b210..2f688ca 100644
--- a/usertests.c
+++ b/usertests.c
@@ -93,8 +93,8 @@ preempt()
 main()
 {
   puts("usertests starting\n");
-  //pipe1();
-  preempt();
+  pipe1();
+  //preempt();
 
   while(1)
     ;