ab0db651af
The x86-64 doesn't just add two levels to page tables to support 64 bit addresses, but is a different processor. For example, calling conventions, system calls, and segmentation are different from 32-bit x86. Segmentation is basically gone, but gs/fs in combination with MSRs can be used to hold a per-core pointer. In general, x86-64 is more straightforward than 32-bit x86. The port uses code from sv6 and the xv6 "rsc-amd64" branch. A summary of the changes is as follows: - Booting: switch to grub instead of xv6's bootloader (pass -kernel to qemu), because xv6's boot loader doesn't understand 64bit ELF files. And, we don't care anymore about booting. - Makefile: use -m64 instead of -m32 flag for gcc, delete boot loader, xv6.img, bochs, and memfs. For now dont' use -O2, since usertests with -O2 is bigger than MAXFILE! - Update gdb.tmpl to be for i386 or x86-64 - Console/printf: use stdarg.h and treat 64-bit addresses different from ints (32-bit) - Update elfhdr to be 64 bit - entry.S/entryother.S: add code to switch to 64-bit mode: build a simple page table in 32-bit mode before switching to 64-bit mode, share code for entering boot processor and APs, and tweak boot gdt. The boot gdt is the gdt that the kernel proper also uses. (In 64-bit mode, the gdt/segmentation and task state mostly disappear.) - exec.c: fix passing argv (64-bit now instead of 32-bit). - initcode.c: use syscall instead of int. - kernel.ld: load kernel very high, in top terabyte. 64 bits is a lot of address space! - proc.c: initial return is through new syscall path instead of trapret. - proc.h: update struct cpu to have some scratch space since syscall saves less state than int, update struct context to reflect x86-64 calling conventions. - swtch: simplify for x86-64 calling conventions. - syscall: add fetcharg to handle x86-64 calling convetions (6 arguments are passed through registers), and fetchaddr to read a 64-bit value from user space. - sysfile: update to handle pointers from user space (e.g., sys_exec), which are 64 bits. - trap.c: no special trap vector for sys calls, because x86-64 has a different plan for system calls. - trapasm: one plan for syscalls and one plan for traps (interrupt and exceptions). On x86-64, the kernel is responsible for switching user/kernel stacks. To do, xv6 keeps some scratch space in the cpu structure, and uses MSR GS_KERN_BASE to point to the core's cpu structure (using swapgs). - types.h: add uint64, and change pde_t to uint64 - usertests: exit() when fork fails, which helped in tracking down one of the bugs in the switch from 32-bit to 64-bit - vectors: update to make them 64 bits - vm.c: use bootgdt in kernel too, program MSRs for syscalls and core-local state (for swapgs), walk 4 levels in walkpgdir, add DEVSPACETOP, use task segment to set kernel stack for interrupts (but simpler than in 32-bit mode), add an extra argument to freevm (size of user part of address space) to avoid checking all entries till KERNBASE (there are MANY TB before the top 1TB). - x86: update trapframe to have 64-bit entries, which is what the processor pushes on syscalls and traps. simplify lgdt and lidt, using struct desctr, which needs the gcc directives packed and aligned. TODO: - use int32 instead of int? - simplify curproc(). xv6 has per-cpu state again, but this time it must have it. - avoid repetition in walkpgdir - fix validateint() in usertests.c - fix bugs (e.g., observed one a case of entering kernel with invalid gs or proc
532 lines
11 KiB
C
532 lines
11 KiB
C
#include "types.h"
|
|
#include "defs.h"
|
|
#include "param.h"
|
|
#include "memlayout.h"
|
|
#include "mmu.h"
|
|
#include "x86.h"
|
|
#include "proc.h"
|
|
#include "spinlock.h"
|
|
#include "msr.h"
|
|
|
|
struct {
|
|
struct spinlock lock;
|
|
struct proc proc[NPROC];
|
|
} ptable;
|
|
|
|
static struct proc *initproc;
|
|
|
|
int nextpid = 1;
|
|
extern void forkret(void);
|
|
extern void sysexit(void);
|
|
|
|
static void wakeup1(void *chan);
|
|
|
|
void
|
|
pinit(void)
|
|
{
|
|
initlock(&ptable.lock, "ptable");
|
|
}
|
|
|
|
// Must be called with interrupts disabled
|
|
int
|
|
cpuid() {
|
|
return mycpu()-cpus;
|
|
}
|
|
|
|
// Must be called with interrupts disabled to avoid the caller being
|
|
// rescheduled between reading lapicid and running through the loop.
|
|
struct cpu*
|
|
mycpu(void)
|
|
{
|
|
int apicid, i;
|
|
|
|
if(readeflags()&FL_IF)
|
|
panic("mycpu called with interrupts enabled\n");
|
|
|
|
apicid = lapicid();
|
|
// APIC IDs are not guaranteed to be contiguous. Maybe we should have
|
|
// a reverse map, or reserve a register to store &cpus[i].
|
|
for (i = 0; i < ncpu; ++i) {
|
|
if (cpus[i].apicid == apicid)
|
|
return &cpus[i];
|
|
}
|
|
panic("unknown apicid\n");
|
|
}
|
|
|
|
// Disable interrupts so that we are not rescheduled
|
|
// while reading proc from the cpu structure
|
|
struct proc*
|
|
myproc(void) {
|
|
struct cpu *c;
|
|
struct proc *p;
|
|
pushcli();
|
|
c = mycpu();
|
|
p = c->proc;
|
|
popcli();
|
|
return p;
|
|
}
|
|
|
|
//PAGEBREAK: 32
|
|
// Look in the process table for an UNUSED proc.
|
|
// If found, change state to EMBRYO and initialize
|
|
// state required to run in the kernel.
|
|
// Otherwise return 0.
|
|
static struct proc*
|
|
allocproc(void)
|
|
{
|
|
struct proc *p;
|
|
char *sp;
|
|
|
|
acquire(&ptable.lock);
|
|
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
|
|
if(p->state == UNUSED)
|
|
goto found;
|
|
|
|
release(&ptable.lock);
|
|
return 0;
|
|
|
|
found:
|
|
p->state = EMBRYO;
|
|
p->pid = nextpid++;
|
|
|
|
release(&ptable.lock);
|
|
|
|
// Allocate kernel stack.
|
|
if((p->kstack = kalloc()) == 0){
|
|
p->state = UNUSED;
|
|
return 0;
|
|
}
|
|
sp = p->kstack + KSTACKSIZE;
|
|
|
|
// Leave room for trap frame.
|
|
sp -= sizeof *p->tf;
|
|
p->tf = (struct trapframe*)sp;
|
|
|
|
// Set up new context to start executing at forkret,
|
|
// which returns to trapret.
|
|
sp -= sizeof(uint64);
|
|
*(uint64*)sp = (uint64)sysexit;
|
|
|
|
sp -= sizeof *p->context;
|
|
p->context = (struct context*)sp;
|
|
memset(p->context, 0, sizeof *p->context);
|
|
p->context->eip = (uint64)forkret;
|
|
|
|
return p;
|
|
}
|
|
|
|
//PAGEBREAK: 32
|
|
// Set up first user process.
|
|
void
|
|
userinit(void)
|
|
{
|
|
struct proc *p;
|
|
extern char _binary_initcode_start[], _binary_initcode_size[];
|
|
|
|
p = allocproc();
|
|
|
|
initproc = p;
|
|
if((p->pgdir = setupkvm()) == 0)
|
|
panic("userinit: out of memory?");
|
|
inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size);
|
|
p->sz = PGSIZE;
|
|
memset(p->tf, 0, sizeof(*p->tf));
|
|
p->tf->r11 = FL_IF;
|
|
p->tf->rsp = PGSIZE;
|
|
p->tf->rcx = 0; // beginning of initcode.S
|
|
|
|
safestrcpy(p->name, "initcode", sizeof(p->name));
|
|
p->cwd = namei("/");
|
|
|
|
// this assignment to p->state lets other cores
|
|
// run this process. the acquire forces the above
|
|
// writes to be visible, and the lock is also needed
|
|
// because the assignment might not be atomic.
|
|
acquire(&ptable.lock);
|
|
|
|
p->state = RUNNABLE;
|
|
|
|
release(&ptable.lock);
|
|
}
|
|
|
|
// Grow current process's memory by n bytes.
|
|
// Return 0 on success, -1 on failure.
|
|
int
|
|
growproc(int n)
|
|
{
|
|
uint sz;
|
|
struct proc *curproc = myproc();
|
|
|
|
sz = curproc->sz;
|
|
if(n > 0){
|
|
if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0)
|
|
return -1;
|
|
} else if(n < 0){
|
|
if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0)
|
|
return -1;
|
|
}
|
|
curproc->sz = sz;
|
|
switchuvm(curproc);
|
|
return 0;
|
|
}
|
|
|
|
// Create a new process copying p as the parent.
|
|
// Sets up stack to return as if from system call.
|
|
// Caller must set state of returned proc to RUNNABLE.
|
|
int
|
|
fork(void)
|
|
{
|
|
int i, pid;
|
|
struct proc *np;
|
|
struct proc *curproc = myproc();
|
|
|
|
// Allocate process.
|
|
if((np = allocproc()) == 0){
|
|
return -1;
|
|
}
|
|
|
|
// Copy process state from proc.
|
|
if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){
|
|
kfree(np->kstack);
|
|
np->kstack = 0;
|
|
np->state = UNUSED;
|
|
return -1;
|
|
}
|
|
np->sz = curproc->sz;
|
|
np->parent = curproc;
|
|
*np->tf = *curproc->tf;
|
|
|
|
// Clear %eax so that fork returns 0 in the child.
|
|
np->tf->rax = 0;
|
|
|
|
for(i = 0; i < NOFILE; i++)
|
|
if(curproc->ofile[i])
|
|
np->ofile[i] = filedup(curproc->ofile[i]);
|
|
np->cwd = idup(curproc->cwd);
|
|
|
|
safestrcpy(np->name, curproc->name, sizeof(curproc->name));
|
|
|
|
pid = np->pid;
|
|
|
|
acquire(&ptable.lock);
|
|
|
|
np->state = RUNNABLE;
|
|
|
|
release(&ptable.lock);
|
|
|
|
return pid;
|
|
}
|
|
|
|
// Exit the current process. Does not return.
|
|
// An exited process remains in the zombie state
|
|
// until its parent calls wait() to find out it exited.
|
|
void
|
|
exit(void)
|
|
{
|
|
struct proc *curproc = myproc();
|
|
struct proc *p;
|
|
int fd;
|
|
|
|
if(curproc == initproc)
|
|
panic("init exiting");
|
|
|
|
// Close all open files.
|
|
for(fd = 0; fd < NOFILE; fd++){
|
|
if(curproc->ofile[fd]){
|
|
fileclose(curproc->ofile[fd]);
|
|
curproc->ofile[fd] = 0;
|
|
}
|
|
}
|
|
|
|
begin_op();
|
|
iput(curproc->cwd);
|
|
end_op();
|
|
curproc->cwd = 0;
|
|
|
|
acquire(&ptable.lock);
|
|
|
|
// Parent might be sleeping in wait().
|
|
wakeup1(curproc->parent);
|
|
|
|
// Pass abandoned children to init.
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
|
if(p->parent == curproc){
|
|
p->parent = initproc;
|
|
if(p->state == ZOMBIE)
|
|
wakeup1(initproc);
|
|
}
|
|
}
|
|
|
|
// Jump into the scheduler, never to return.
|
|
curproc->state = ZOMBIE;
|
|
sched();
|
|
panic("zombie exit");
|
|
}
|
|
|
|
// Wait for a child process to exit and return its pid.
|
|
// Return -1 if this process has no children.
|
|
int
|
|
wait(void)
|
|
{
|
|
struct proc *p;
|
|
int havekids, pid;
|
|
struct proc *curproc = myproc();
|
|
|
|
acquire(&ptable.lock);
|
|
for(;;){
|
|
// Scan through table looking for exited children.
|
|
havekids = 0;
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
|
if(p->parent != curproc)
|
|
continue;
|
|
havekids = 1;
|
|
if(p->state == ZOMBIE){
|
|
// Found one.
|
|
pid = p->pid;
|
|
kfree(p->kstack);
|
|
p->kstack = 0;
|
|
freevm(p->pgdir, p->sz);
|
|
p->pid = 0;
|
|
p->parent = 0;
|
|
p->name[0] = 0;
|
|
p->killed = 0;
|
|
p->state = UNUSED;
|
|
release(&ptable.lock);
|
|
return pid;
|
|
}
|
|
}
|
|
|
|
// No point waiting if we don't have any children.
|
|
if(!havekids || curproc->killed){
|
|
release(&ptable.lock);
|
|
return -1;
|
|
}
|
|
|
|
// Wait for children to exit. (See wakeup1 call in proc_exit.)
|
|
sleep(curproc, &ptable.lock); //DOC: wait-sleep
|
|
}
|
|
}
|
|
|
|
//PAGEBREAK: 42
|
|
// Per-CPU process scheduler.
|
|
// Each CPU calls scheduler() after setting itself up.
|
|
// Scheduler never returns. It loops, doing:
|
|
// - choose a process to run
|
|
// - swtch to start running that process
|
|
// - eventually that process transfers control
|
|
// via swtch back to the scheduler.
|
|
void
|
|
scheduler(void)
|
|
{
|
|
struct proc *p;
|
|
struct cpu *c = mycpu();
|
|
c->proc = 0;
|
|
|
|
for(;;){
|
|
// Enable interrupts on this processor.
|
|
sti();
|
|
|
|
// Loop over process table looking for process to run.
|
|
acquire(&ptable.lock);
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
|
if(p->state != RUNNABLE)
|
|
continue;
|
|
|
|
// Switch to chosen process. It is the process's job
|
|
// to release ptable.lock and then reacquire it
|
|
// before jumping back to us.
|
|
|
|
c->proc = p;
|
|
switchuvm(p);
|
|
p->state = RUNNING;
|
|
|
|
swtch(&(c->scheduler), p->context);
|
|
switchkvm();
|
|
|
|
// Process is done running for now.
|
|
// It should have changed its p->state before coming back.
|
|
c->proc = 0;
|
|
}
|
|
release(&ptable.lock);
|
|
|
|
}
|
|
}
|
|
|
|
// Enter scheduler. Must hold only ptable.lock
|
|
// and have changed proc->state. Saves and restores
|
|
// intena because intena is a property of this
|
|
// kernel thread, not this CPU. It should
|
|
// be proc->intena and proc->ncli, but that would
|
|
// break in the few places where a lock is held but
|
|
// there's no process.
|
|
void
|
|
sched(void)
|
|
{
|
|
int intena;
|
|
struct proc *p = myproc();
|
|
|
|
if(!holding(&ptable.lock))
|
|
panic("sched ptable.lock");
|
|
if(mycpu()->ncli != 1)
|
|
panic("sched locks");
|
|
if(p->state == RUNNING)
|
|
panic("sched running");
|
|
if(readeflags()&FL_IF)
|
|
panic("sched interruptible");
|
|
intena = mycpu()->intena;
|
|
swtch(&p->context, mycpu()->scheduler);
|
|
mycpu()->intena = intena;
|
|
}
|
|
|
|
// Give up the CPU for one scheduling round.
|
|
void
|
|
yield(void)
|
|
{
|
|
acquire(&ptable.lock); //DOC: yieldlock
|
|
myproc()->state = RUNNABLE;
|
|
sched();
|
|
release(&ptable.lock);
|
|
}
|
|
|
|
// A fork child's very first scheduling by scheduler()
|
|
// will swtch here. "Return" to user space.
|
|
void
|
|
forkret(void)
|
|
{
|
|
static int first = 1;
|
|
// Still holding ptable.lock from scheduler.
|
|
release(&ptable.lock);
|
|
|
|
if (first) {
|
|
// Some initialization functions must be run in the context
|
|
// of a regular process (e.g., they call sleep), and thus cannot
|
|
// be run from main().
|
|
first = 0;
|
|
iinit(ROOTDEV);
|
|
initlog(ROOTDEV);
|
|
}
|
|
|
|
// Return to "caller", actually trapret (see allocproc).
|
|
}
|
|
|
|
// Atomically release lock and sleep on chan.
|
|
// Reacquires lock when awakened.
|
|
void
|
|
sleep(void *chan, struct spinlock *lk)
|
|
{
|
|
struct proc *p = myproc();
|
|
|
|
if(p == 0)
|
|
panic("sleep");
|
|
|
|
if(lk == 0)
|
|
panic("sleep without lk");
|
|
|
|
// Must acquire ptable.lock in order to
|
|
// change p->state and then call sched.
|
|
// Once we hold ptable.lock, we can be
|
|
// guaranteed that we won't miss any wakeup
|
|
// (wakeup runs with ptable.lock locked),
|
|
// so it's okay to release lk.
|
|
if(lk != &ptable.lock){ //DOC: sleeplock0
|
|
acquire(&ptable.lock); //DOC: sleeplock1
|
|
release(lk);
|
|
}
|
|
// Go to sleep.
|
|
p->chan = chan;
|
|
p->state = SLEEPING;
|
|
|
|
sched();
|
|
|
|
// Tidy up.
|
|
p->chan = 0;
|
|
|
|
// Reacquire original lock.
|
|
if(lk != &ptable.lock){ //DOC: sleeplock2
|
|
release(&ptable.lock);
|
|
acquire(lk);
|
|
}
|
|
}
|
|
|
|
//PAGEBREAK!
|
|
// Wake up all processes sleeping on chan.
|
|
// The ptable lock must be held.
|
|
static void
|
|
wakeup1(void *chan)
|
|
{
|
|
struct proc *p;
|
|
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
|
|
if(p->state == SLEEPING && p->chan == chan)
|
|
p->state = RUNNABLE;
|
|
}
|
|
|
|
// Wake up all processes sleeping on chan.
|
|
void
|
|
wakeup(void *chan)
|
|
{
|
|
acquire(&ptable.lock);
|
|
wakeup1(chan);
|
|
release(&ptable.lock);
|
|
}
|
|
|
|
// Kill the process with the given pid.
|
|
// Process won't exit until it returns
|
|
// to user space (see trap in trap.c).
|
|
int
|
|
kill(int pid)
|
|
{
|
|
struct proc *p;
|
|
|
|
acquire(&ptable.lock);
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
|
if(p->pid == pid){
|
|
p->killed = 1;
|
|
// Wake process from sleep if necessary.
|
|
if(p->state == SLEEPING)
|
|
p->state = RUNNABLE;
|
|
release(&ptable.lock);
|
|
return 0;
|
|
}
|
|
}
|
|
release(&ptable.lock);
|
|
return -1;
|
|
}
|
|
|
|
//PAGEBREAK: 36
|
|
// Print a process listing to console. For debugging.
|
|
// Runs when user types ^P on console.
|
|
// No lock to avoid wedging a stuck machine further.
|
|
void
|
|
procdump(void)
|
|
{
|
|
static char *states[] = {
|
|
[UNUSED] "unused",
|
|
[EMBRYO] "embryo",
|
|
[SLEEPING] "sleep ",
|
|
[RUNNABLE] "runble",
|
|
[RUNNING] "run ",
|
|
[ZOMBIE] "zombie"
|
|
};
|
|
int i;
|
|
struct proc *p;
|
|
char *state;
|
|
uint64 pc[10];
|
|
|
|
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
|
|
if(p->state == UNUSED)
|
|
continue;
|
|
if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
|
|
state = states[p->state];
|
|
else
|
|
state = "???";
|
|
cprintf("%d %s %s", p->pid, state, p->name);
|
|
if(p->state == SLEEPING){
|
|
getcallerpcs((uint64*)p->context->ebp+2, pc);
|
|
for(i=0; i<10 && pc[i] != 0; i++)
|
|
cprintf(" %p", pc[i]);
|
|
}
|
|
cprintf("\n");
|
|
}
|
|
}
|