diff --git a/.gdbinit.tmpl-riscv b/.gdbinit.tmpl-riscv new file mode 100644 index 0000000..6ea36e1 --- /dev/null +++ b/.gdbinit.tmpl-riscv @@ -0,0 +1,3 @@ +set architecture riscv +target remote 127.0.0.1:1234 +symbol-file kernel diff --git a/Makefile b/Makefile index b199842..1424d7c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,20 @@ OBJS = \ + start.o \ + console.o \ + uart.o \ + kalloc.o \ + spinlock.o \ + string.o \ + main.o \ + vm.o \ + proc.o \ + swtch.o \ + trampoline.o \ + trap.o \ + syscall.o \ + sysproc.o + +XXXOBJS = \ bio.o\ console.o\ exec.o\ @@ -28,48 +44,23 @@ OBJS = \ vectors.o\ vm.o\ -# Cross-compiling (e.g., on Mac OS X) -# TOOLPREFIX = i386-jos-elf - -# Using native tools (e.g., on X86 Linux) +# riscv64-unknown-elf- or riscv64-linux-gnu- +# perhaps in /opt/riscv/bin #TOOLPREFIX = # Try to infer the correct TOOLPREFIX if not set ifndef TOOLPREFIX -TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ - then echo 'i386-jos-elf-'; \ - elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ - then echo ''; \ +TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ + then echo 'riscv64-unknown-elf-'; \ + elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ + then echo 'riscv64-linux-gnu-'; \ else echo "***" 1>&2; \ - echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ - echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ - echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ - echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ - echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ + echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \ echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ echo "***" 1>&2; exit 1; fi) endif -# If the makefile can't find QEMU, specify its path here -QEMU = qemu-system-x86_64 - -# Try to infer the correct QEMU -ifndef QEMU -QEMU = $(shell if which qemu > /dev/null; \ - then echo qemu; exit; \ - elif which qemu-system-i386 > /dev/null; \ - then echo qemu-system-i386; exit; \ - elif which qemu-system-x86_64 > /dev/null; \ - then echo qemu-system-x86_64; exit; \ - else \ - qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ - if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ - echo "***" 1>&2; \ - echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ - echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ - echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ - echo "***" 1>&2; exit 1) -endif +QEMU = qemu-system-riscv64 CC = $(TOOLPREFIX)gcc AS = $(TOOLPREFIX)gas @@ -77,15 +68,10 @@ LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -XFLAGS = -m64 -mcmodel=large -ggdb -# CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -CFLAGS += -ffreestanding -fno-common -nostdlib $(XFLAGS) +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -O +CFLAGS = -mcmodel=medany +CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) -ASFLAGS = -gdwarf-2 -Wa,-divide $(XFLAGS) -# FreeBSD ld wants ``elf_i386_fbsd'' -LDFLAGS += -m $(shell $(LD) -V | grep elf_x86_64 2>/dev/null | head -n 1) -LDFLAGS += -z max-page-size=4096 # Disable PIE when possible (for Ubuntu 16.10 toolchain) ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) @@ -95,21 +81,17 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),) CFLAGS += -fno-pie -nopie endif -kernel: $(OBJS) entry.o entryother initcode kernel.ld - $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother +LDFLAGS = -z max-page-size=4096 + +kernel: $(OBJS) entry.o kernel.ld + $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) $(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym -entryother: entryother.S - $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S - $(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o - $(OBJCOPY) -S -O binary -j .text bootblockother.o entryother - $(OBJDUMP) -S bootblockother.o > entryother.asm - initcode: initcode.S $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S - $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o - $(OBJCOPY) -S -O binary initcode.out initcode + #$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o + #$(OBJCOPY) -S -O binary initcode.out initcode $(OBJDUMP) -S initcode.o > initcode.asm tags: $(OBJS) entryother.S _init @@ -186,19 +168,18 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ then echo "-gdb tcp::$(GDBPORT)"; \ else echo "-s -p $(GDBPORT)"; fi) ifndef CPUS -CPUS := 2 +CPUS := 1 endif -QEMUOPTS = -kernel kernel -drive file=fs.img,index=1,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) -qemu: fs.img - $(QEMU) -serial mon:stdio $(QEMUOPTS) +QEMUOPTS = -machine virt -kernel kernel -m 3G -smp $(CPUS) -nographic +#QEMUOPTS += -initrd fs.img -qemu-nox: fs.img kernel - $(QEMU) -nographic $(QEMUOPTS) +qemu: kernel + $(QEMU) $(QEMUOPTS) -.gdbinit: .gdbinit.tmpl-x64 - sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ +.gdbinit: .gdbinit.tmpl-riscv + sed "s/:1234/:$(GDBPORT)/" < $^ > $@ -qemu-gdb: fs.img kernel .gdbinit +qemu-gdb: kernel .gdbinit @echo "*** Now run 'gdb'." 1>&2 $(QEMU) $(QEMUOPTS) -S $(QEMUGDB) diff --git a/console.c b/console.c index 9986a9c..25a621a 100644 --- a/console.c +++ b/console.c @@ -5,17 +5,14 @@ #include #include "types.h" -#include "defs.h" #include "param.h" -#include "traps.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "file.h" #include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" +#include "riscv.h" +#include "defs.h" static void consputc(int); @@ -28,6 +25,12 @@ static struct { static char digits[] = "0123456789abcdef"; +void +consoleinit(void) +{ + initlock(&cons.lock, "console"); +} + static void printint(int xx, int base, int sign) { @@ -66,7 +69,7 @@ printptr(uint64 x) { // Print to the console. only understands %d, %x, %p, %s. void -cprintf(char *fmt, ...) +printf(char *fmt, ...) { va_list ap; int i, c, locking; @@ -122,67 +125,20 @@ cprintf(char *fmt, ...) void panic(char *s) { - int i; - uint64 pcs[10]; - - cli(); - cons.locking = 0; - // use lapiccpunum so that we can call panic from mycpu() - cprintf("lapicid %d: panic: ", lapicid()); - cprintf(s); - cprintf("\n"); - getcallerpcs(&s, pcs); - for(i=0; i<10; i++) - cprintf(" %p", pcs[i]); + printf("panic: "); + printf(s); + printf("\n"); panicked = 1; // freeze other CPU for(;;) ; } -//PAGEBREAK: 50 #define BACKSPACE 0x100 -#define CRTPORT 0x3d4 -static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory - -static void -cgaputc(int c) -{ - int pos; - - // Cursor position: col + 80*row. - outb(CRTPORT, 14); - pos = inb(CRTPORT+1) << 8; - outb(CRTPORT, 15); - pos |= inb(CRTPORT+1); - - if(c == '\n') - pos += 80 - pos%80; - else if(c == BACKSPACE){ - if(pos > 0) --pos; - } else - crt[pos++] = (c&0xff) | 0x0700; // black on white - - if(pos < 0 || pos > 25*80) - panic("pos under/overflow"); - - if((pos/80) >= 24){ // Scroll up. - memmove(crt, crt+80, sizeof(crt[0])*23*80); - pos -= 80; - memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos)); - } - - outb(CRTPORT, 14); - outb(CRTPORT+1, pos>>8); - outb(CRTPORT, 15); - outb(CRTPORT+1, pos); - crt[pos] = ' ' | 0x0700; -} void consputc(int c) { if(panicked){ - cli(); for(;;) ; } @@ -191,125 +147,4 @@ consputc(int c) uartputc('\b'); uartputc(' '); uartputc('\b'); } else uartputc(c); - cgaputc(c); } - -#define INPUT_BUF 128 -struct { - char buf[INPUT_BUF]; - uint r; // Read index - uint w; // Write index - uint e; // Edit index -} input; - -#define C(x) ((x)-'@') // Control-x - -void -consoleintr(int (*getc)(void)) -{ - int c, doprocdump = 0; - - acquire(&cons.lock); - while((c = getc()) >= 0){ - switch(c){ - case C('P'): // Process listing. - // procdump() locks cons.lock indirectly; invoke later - doprocdump = 1; - break; - case C('U'): // Kill line. - while(input.e != input.w && - input.buf[(input.e-1) % INPUT_BUF] != '\n'){ - input.e--; - consputc(BACKSPACE); - } - break; - case C('H'): case '\x7f': // Backspace - if(input.e != input.w){ - input.e--; - consputc(BACKSPACE); - } - break; - default: - if(c != 0 && input.e-input.r < INPUT_BUF){ - c = (c == '\r') ? '\n' : c; - input.buf[input.e++ % INPUT_BUF] = c; - consputc(c); - if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ - input.w = input.e; - wakeup(&input.r); - } - } - break; - } - } - release(&cons.lock); - if(doprocdump) { - procdump(); // now call procdump() wo. cons.lock held - } -} - -int -consoleread(struct inode *ip, char *dst, int n) -{ - uint target; - int c; - - iunlock(ip); - target = n; - acquire(&cons.lock); - while(n > 0){ - while(input.r == input.w){ - if(myproc()->killed){ - release(&cons.lock); - ilock(ip); - return -1; - } - sleep(&input.r, &cons.lock); - } - c = input.buf[input.r++ % INPUT_BUF]; - if(c == C('D')){ // EOF - if(n < target){ - // Save ^D for next time, to make sure - // caller gets a 0-byte result. - input.r--; - } - break; - } - *dst++ = c; - --n; - if(c == '\n') - break; - } - release(&cons.lock); - ilock(ip); - - return target - n; -} - -int -consolewrite(struct inode *ip, char *buf, int n) -{ - int i; - - iunlock(ip); - acquire(&cons.lock); - for(i = 0; i < n; i++) - consputc(buf[i] & 0xff); - release(&cons.lock); - ilock(ip); - - return n; -} - -void -consoleinit(void) -{ - initlock(&cons.lock, "console"); - - devsw[CONSOLE].write = consolewrite; - devsw[CONSOLE].read = consoleread; - cons.locking = 1; - - ioapicenable(IRQ_KBD, 0); -} - diff --git a/defs.h b/defs.h index 3735f6f..d86e495 100644 --- a/defs.h +++ b/defs.h @@ -19,7 +19,7 @@ void bwrite(struct buf*); // console.c void consoleinit(void); -void cprintf(char*, ...); +void printf(char*, ...); void consoleintr(int(*)(void)); void panic(char*) __attribute__((noreturn)); @@ -65,10 +65,9 @@ extern uchar ioapicid; void ioapicinit(void); // kalloc.c -char* kalloc(void); -void kfree(char*); -void kinit1(void*, void*); -void kinit2(void*, void*); +void* kalloc(void); +void kfree(void *); +void kinit(); // kbd.c void kbdintr(void); @@ -112,7 +111,7 @@ int kill(int); struct cpu* mycpu(void); struct cpu* getmycpu(void); struct proc* myproc(); -void pinit(void); +void procinit(void); void procdump(void); void scheduler(void) __attribute__((noreturn)); void sched(void); @@ -124,7 +123,7 @@ void wakeup(void*); void yield(void); // swtch.S -void swtch(struct context**, struct context*); +void swtch(struct context*, struct context*); // spinlock.c void acquire(struct spinlock*); @@ -158,16 +157,16 @@ int argaddr(int, uint64 *); int fetchint(uint64, int*); int fetchstr(uint64, char**); int fetchaddr(uint64, uint64*); -void syscall(struct sysframe*); +void syscall(); // timer.c void timerinit(void); // trap.c -void idtinit(void); extern uint ticks; -void tvinit(void); +void trapinit(void); extern struct spinlock tickslock; +void usertrapret(void); // uart.c void uartinit(void); @@ -175,20 +174,15 @@ void uartintr(void); void uartputc(int); // vm.c -void seginit(void); -void kvmalloc(void); -pde_t* setupkvm(void); -char* uva2ka(pde_t*, char*); -int allocuvm(pde_t*, uint, uint); -int deallocuvm(pde_t*, uint64, uint64); -void freevm(pde_t*, uint64); -void inituvm(pde_t*, char*, uint); -int loaduvm(pde_t*, char*, struct inode*, uint, uint); -pde_t* copyuvm(pde_t*, uint); -void switchuvm(struct proc*); -void switchkvm(void); -int copyout(pde_t*, uint, void*, uint); -void clearpteu(pde_t *pgdir, char *uva); +void kvminit(void); +void kvmswitch(void); +pagetable_t uvmcreate(void); +void uvminit(pagetable_t, char *, uint); +int uvmdealloc(pagetable_t, uint64, uint64); +void uvmcopy(pagetable_t, pagetable_t, uint64); +void uvmfree(pagetable_t, uint64); +void mappages(pagetable_t, uint64, uint64, uint64, int); +void unmappages(pagetable_t, uint64, uint64, int); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/entry.S b/entry.S index 0aacb4c..8b3316c 100644 --- a/entry.S +++ b/entry.S @@ -1,223 +1,22 @@ -# x86-64 bootstrap, assuming load by MultiBoot-compliant loader. -# The MutliBoot specification is at: -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html -# GRUB is a MultiBoot loader, as is qemu's -kernel option. - -#include "mmu.h" -#include "memlayout.h" - -# STACK is the size of the bootstrap stack. -#define STACK 8192 - -# MultiBoot header. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Header-layout -.align 4 -.text -.globl multiboot_header -multiboot_header: - #define magic 0x1badb002 - #define flags (1<<16 | 1<<0) - .long magic - .long flags - .long (- magic - flags) # checksum - .long V2P_WO(multiboot_header) # header address - .long V2P_WO(multiboot_header) # load address - .long V2P_WO(edata) # load end address - .long V2P_WO(end) # bss end address - .long V2P_WO(start) # entry address - -# Entry point jumped to by boot loader. Running in 32-bit mode. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Machine-state -# -# EAX = 0x2badb002 -# EBX = address of multiboot information structure -# CS = 32-bit read/execute code segment with identity map -# DS, ES, FS, GS, SS = 32-bit read/write data segment with identity map -# A20 gate = enabled -# CR0 = PE set, PG clear -# EFLAGS = VM clear, IF clear -# -.code32 -.globl start -start: - # Tell BIOS to do "warm reboot" when we shut down. - movw $0x1234, 0x472 - - # Set up multiboot arguments for main. - movl %eax, %edi - movl %ebx, %esi - - # Initialize stack. - movl $V2P_WO(stack+STACK), %esp - - # Zero bss. QEMU's MultiBoot seems not to. - # It's possible that the header above is not right, but it looks right. - # %edi is holding multiboot argument, so save in another register. - # (The stack is in the bss.) - movl %edi, %edx - movl $V2P_WO(edata), %edi - movl $V2P_WO(end), %ecx - subl $V2P_WO(edata), %ecx - movl $0, %eax - cld - rep stosb - movl %edx, %edi - - call loadgdt - - # Enter new 32-bit code segment (already in 32-bit mode). - ljmp $SEG_KCODE32, $V2P_WO(start32) // code32 segment selector - -start32: - # Initialize page table. - call initpagetables - call init32e - - movl $V2P_WO(start64), %eax - # Enter 64-bit mode. - ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector - -.code64 -start64: - # Load VA of stack - movabsq $(stack+STACK), %rsp - # Clear frame pointer for stack walks - movl $0, %ebp - # Call into C code. - call main - # should not return from main - jmp . - -.code32 -.global apstart -apstart: - call loadgdt - ljmp $SEG_KCODE32, $V2P_WO(apstart32) // code32 segment selector - -apstart32: - call init32e - movl $V2P_WO(apstart64), %eax - ljmp $SEG_KCODE, $V2P_WO(tramp64) // code64 segment selector - -.code64 -apstart64: - # Remember (from bootothers), that our kernel stack pointer is - # at the top of our temporary stack. - popq %rax - movq %rax, %rsp - movq $0, %rbp - call apmain - jmp . - -.code64 -tramp64: - # The linker thinks we are running at tramp64, but we're actually - # running at PADDR(tramp64), so use an explicit calculation to - # load and jump to the correct address. %rax should hold the - # physical address of the jmp target. - movq $KERNBASE, %r11 - addq %r11, %rax - jmp *%rax - -# Initial stack -.comm stack, STACK - -# Page tables. See section 4.5 of 253668.pdf. -# We map the first GB of physical memory at 0 and at 1 TB (not GB) before -# the end of virtual memory. At boot time we are using the mapping at 0 -# but during ordinary execution we use the high mapping. -# The intent is that after bootstrap the kernel can expand this mapping -# to cover all the available physical memory. -# This would be easier if we could use the PS bit to create GB-sized entries -# and skip the pdt table, but not all chips support it, and QEMU doesn't. -.align 4096 -pml4: - .quad V2P_WO(pdpt) + PTE_P + PTE_W // present, read/write - .quad 0 - .space 4096 - 2*16 - .quad V2P_WO(pdpt) + PTE_P + PTE_W - .quad 0 - -.align 4096 -pdpt: - .quad V2P_WO(pdt) + PTE_P + PTE_W - .space 4096 - 8 - -.align 4096 -pdt: - // Filled in below. - .space 4096 - -.code32 -initpagetables: - pushl %edi - pushl %ecx - pushl %eax - - // Set up 64-bit entry in %edx:%eax. - // Base address 0, present, read/write, large page. - movl $(0 | PTE_P | PTE_W | PTE_PS), %eax - movl $0, %edx - - // Fill in 512 entries at pdt. - movl $V2P_WO(pdt), %edi - movl $512, %ecx -1: - // Write this 64-bit entry. - movl %eax, 0(%edi) - movl %edx, 4(%edi) - addl $8, %edi - // 64-bit add to prepare address for next entry. - // Because this is a large page entry, it covers 512 4k pages (2 MB). - add $(512*4096), %eax - adc $0, %edx - loop 1b - - popl %eax - popl %ecx - popl %edi - ret - -# Initialize IA-32e mode. See section 9.8.5 of 253668.pdf. -init32e: - # Set CR4.PAE and CR4.PSE = 1. - movl %cr4, %eax - orl $0x30, %eax - movl %eax, %cr4 - - # Load CR3 with physical base address of level 4 page table. - movl $V2P_WO(pml4), %eax - movl %eax, %cr3 - - # Enable IA-32e mode by setting IA32_EFER.LME = 1. - # Also turn on IA32_EFER.SCE (syscall enable). - movl $0xc0000080, %ecx - rdmsr - orl $0x101, %eax - wrmsr - - # Enable paging by setting CR0.PG = 1. - movl %cr0, %eax - orl $0x80000000, %eax - movl %eax, %cr0 - nop - nop - - ret - -loadgdt: - subl $8, %esp - movl $V2P_WO(bootgdt), 4(%esp) - movw $(8*NSEGS-1), 2(%esp) - lgdt 2(%esp) - addl $8, %esp - - movl $SEG_KDATA, %eax // data segment selector - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - movl $0, %eax // null segment selector - movw %ax, %fs - movw %ax, %gs - - ret + # qemu -kernel starts at 0x1000. the instructions + # there seem to be provided by qemu, as if it + # were a ROM. the code at 0x1000 jumps to + # 0x8000000, the _start function here, + # in machine mode. +.section .data +.globl stack0 +.section .text +.globl mstart +.section .text +.globl _entry +_entry: + # set up a stack for C; stack0 is declared in start. + la sp, stack0 + addi sp, sp, 1024 + addi sp, sp, 1024 + addi sp, sp, 1024 + addi sp, sp, 1024 + # jump to mstart() in start.c + call mstart +junk: + j junk diff --git a/exec.c b/exec.c index 743437a..6d0ef24 100644 --- a/exec.c +++ b/exec.c @@ -19,8 +19,8 @@ exec(char *path, char **argv) struct inode *ip; struct proghdr ph; pde_t *pgdir, *oldpgdir; - struct proc *curproc = myproc(); - uint64 oldsz = curproc->sz; + struct proc *p = myproc(); + uint64 oldsz = p->sz; begin_op(); @@ -85,8 +85,8 @@ exec(char *path, char **argv) ustack[1] = argc; ustack[2] = sp - (argc+1)*sizeof(uint64); // argv pointer - curproc->sf->rdi = argc; - curproc->sf->rsi = sp - (argc+1)*sizeof(uint64); + p->sf->rdi = argc; + p->sf->rsi = sp - (argc+1)*sizeof(uint64); sp -= (3+argc+1) * sizeof(uint64); if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0) @@ -96,15 +96,15 @@ exec(char *path, char **argv) for(last=s=path; *s; s++) if(*s == '/') last = s+1; - safestrcpy(curproc->name, last, sizeof(curproc->name)); + safestrcpy(p->name, last, sizeof(p->name)); // Commit to the user image. - oldpgdir = curproc->pgdir; - curproc->pgdir = pgdir; - curproc->sz = sz; - curproc->sf->rcx = elf.entry; // main - curproc->sf->rsp = sp; - switchuvm(curproc); + oldpgdir = p->pgdir; + p->pgdir = pgdir; + p->sz = sz; + p->sf->rcx = elf.entry; // main + p->sf->rsp = sp; + switchuvm(p); freevm(oldpgdir, oldsz); return 0; diff --git a/initcode.S b/initcode.S index e097394..ca76972 100644 --- a/initcode.S +++ b/initcode.S @@ -2,22 +2,20 @@ # This code runs in user space. #include "syscall.h" -#include "traps.h" - # exec(init, argv) .globl start start: - mov $init, %rdi - mov $argv, %rsi - mov $SYS_exec, %rax - syscall + la a0, init + la a1, argv + li a7, SYS_exec + ecall # for(;;) exit(); exit: - mov $SYS_exit, %rax - syscall - jmp exit + li a7, SYS_exit + ecall + jal exit # char init[] = "/init\0"; init: @@ -28,4 +26,3 @@ init: argv: .long init .long 0 - diff --git a/kalloc.c b/kalloc.c index fb939b7..c943e5e 100644 --- a/kalloc.c +++ b/kalloc.c @@ -3,13 +3,14 @@ // and pipe buffers. Allocates 4096-byte pages. #include "types.h" -#include "defs.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" #include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); -void freerange(void *vstart, void *vend); extern char end[]; // first address after kernel loaded from ELF file // defined by the kernel linker script in kernel.ld @@ -19,36 +20,22 @@ struct run { struct { struct spinlock lock; - int use_lock; struct run *freelist; } kmem; -// Initialization happens in two phases. -// 1. main() calls kinit1() while still using entrypgdir to place just -// the pages mapped by entrypgdir on free list. -// 2. main() calls kinit2() with the rest of the physical pages -// after installing a full page table that maps them on all cores. void -kinit1(void *vstart, void *vend) +kinit() { initlock(&kmem.lock, "kmem"); - kmem.use_lock = 0; - freerange(vstart, vend); + freerange(end, (void*)PHYSTOP); } void -kinit2(void *vstart, void *vend) -{ - freerange(vstart, vend); - kmem.use_lock = 1; -} - -void -freerange(void *vstart, void *vend) +freerange(void *pa_start, void *pa_end) { char *p; - p = (char*)PGROUNDUP((uint64)vstart); - for(; p + PGSIZE <= (char*)vend; p += PGSIZE) + p = (char*)PGROUNDUP((uint64)pa_start); + for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) kfree(p); } //PAGEBREAK: 21 @@ -57,42 +44,37 @@ freerange(void *vstart, void *vend) // call to kalloc(). (The exception is when // initializing the allocator; see kinit above.) void -kfree(char *v) +kfree(void *pa) { struct run *r; - if((uint64)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) + if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) panic("kfree"); // Fill with junk to catch dangling refs. - memset(v, 1, PGSIZE); + memset(pa, 1, PGSIZE); - if(kmem.use_lock) - acquire(&kmem.lock); - r = (struct run*)v; + acquire(&kmem.lock); + r = (struct run*)pa; r->next = kmem.freelist; kmem.freelist = r; - if(kmem.use_lock) - release(&kmem.lock); + release(&kmem.lock); } // Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. -char* +void * kalloc(void) { struct run *r; - if(kmem.use_lock) - acquire(&kmem.lock); + acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; - if(kmem.use_lock) - release(&kmem.lock); - if(r != 0 && (uint64) r < KERNBASE) - panic("kalloc"); - return (char*)r; + release(&kmem.lock); + memset((char*)r, 5, PGSIZE); // fill with junk + return (void*)r; } diff --git a/kernel.ld b/kernel.ld index 11dc98f..08fc280 100644 --- a/kernel.ld +++ b/kernel.ld @@ -1,50 +1,33 @@ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) SECTIONS { - . = 0xFFFFFF0000100000; - PROVIDE(text = .); - .text : AT(0x100000) { - *(.text .stub .text.* .gnu.linkonce.t.*) - } - .rodata : { - *(.rodata .rodata.* .gnu.linkonce.r.*) - } + /* + * ensure that entry.S / _entry is at 0x80000000, + * where qemu's -kernel jumps. + */ + . = 0x80000000; + .text : + { + *(.text) + . = ALIGN(0x1000); + *(trampoline) + } - /* Include debugging information in kernel memory */ - .stab : { - PROVIDE(__STAB_BEGIN__ = .); - *(.stab); - PROVIDE(__STAB_END__ = .); - BYTE(0) /* Force the linker to allocate space - for this section */ - } + . = ALIGN(0x1000); + PROVIDE(etext = .); - .stabstr : { - PROVIDE(__STABSTR_BEGIN__ = .); - *(.stabstr); - PROVIDE(__STABSTR_END__ = .); - BYTE(0) /* Force the linker to allocate space - for this section */ - } + /* + * make sure end is after data and bss. + */ + .data : { + *(.data) + } + bss : { + *(.bss) + } - . = ALIGN(0x1000); - - /* Conventionally, Unix linkers provide pseudo-symbols - * etext, edata, and end, at the end of the text, data, and bss. - * For the kernel mapping, we need the address at the beginning - * of the data section, but that's not one of the conventional - * symbols, because the convention started before there was a - * read-only rodata section between text and data. */ - PROVIDE(data = .); - .data : { - *(.data) - } - bss : { - PROVIDE(edata = .); - *(.bss) - *(COMMON) - PROVIDE(end = .); - } + . = ALIGN(0x1000); + PROVIDE(end = .); } diff --git a/main.c b/main.c index 3127b15..04e822a 100644 --- a/main.c +++ b/main.c @@ -1,105 +1,28 @@ #include "types.h" -#include "defs.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -extern pde_t *kpgdir; -extern char end[]; // first address after kernel loaded from ELF file - -static void mpmain(void) __attribute__((noreturn)); -static void startothers(void); - +#include "riscv.h" +#include "defs.h" // Bootstrap processor starts running C code here. // Allocate a real stack and switch to it, first // doing some setup required for memory allocator to work. -int -main(uint64 mbmagic, uint64 mbaddr) +void +main() { - if(mbmagic != 0x2badb002) - panic("multiboot header not found"); - - kinit1(end, P2V(4*1024*1024)); // phys page allocator - kvmalloc(); // kernel page table - mpinit(); // detect other processors - lapicinit(); // interrupt controller - seginit(); // segment descriptors - picinit(); // disable pic - ioapicinit(); // another interrupt controller - consoleinit(); // console hardware uartinit(); // serial port - pinit(); // process table - tvinit(); // trap vectors + consoleinit(); + printf("entering main()\n"); + kinit(); // physical page allocator + kvminit(); // kernel page table + procinit(); // process table + trapinit(); // trap vectors +#if 0 binit(); // buffer cache fileinit(); // file table ideinit(); // disk - - startothers(); // start other processors - - kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() +#endif userinit(); // first user process - mpmain(); - return 0; + + scheduler(); } - -extern struct cpu* getmycpu(); - -// Common CPU setup code. -static void -mpmain(void) -{ - cprintf("cpu%d: starting %d\n", cpuid(), cpuid()); - idtinit(); // load idt register - xchg(&(mycpu()->started), 1); // tell startothers() we're up - scheduler(); // start running processes -} - -// AP processors jump here from entryother.S. -void -apmain(void) -{ - switchkvm(); - seginit(); - lapicinit(); - mpmain(); -} - -void apstart(void); - -// Start the non-boot (AP) processors. -static void -startothers(void) -{ - extern uchar _binary_entryother_start[], _binary_entryother_size[]; - uchar *code; - struct cpu *c; - char *stack; - - // Write entry code to unused memory at 0x7000. - // The linker has placed the image of entryother.S in - // _binary_entryother_start. - code = P2V(0x7000); - memmove(code, _binary_entryother_start, (uint64)_binary_entryother_size); - - for(c = cpus; c < cpus+ncpu; c++){ - if(c == mycpu()) // We've started already. - continue; - - // Tell entryother.S what stack to use, where to enter, and what - // pgdir to use. We cannot use kpgdir yet, because the AP processor - // is running in low memory, so we use entrypgdir for the APs too. - stack = kalloc(); - *(uint32*)(code-4) = V2P(apstart); - *(uint64*)(code-12) = (uint64) (stack+KSTACKSIZE); - - lapicstartap(c->apicid, V2P(code)); - - // wait for cpu to finish mpmain() - while(c->started == 0) - ; - } -} - diff --git a/memlayout.h b/memlayout.h index 87818d3..798621e 100644 --- a/memlayout.h +++ b/memlayout.h @@ -1,16 +1,25 @@ -// Memory layout +// Physical memory layout -#define EXTMEM 0x100000 // Start of extended memory -#define PHYSTOP 0xE000000 // Top physical memory -#define DEVSPACE 0xFE000000 // Other devices are top of 32-bit address space -#define DEVSPACETOP 0x100000000 +// qemu -machine virt is set up like this: +// 00001000 -- boot ROM, provided by qemu +// 10000000 -- uart0 registers +// 80000000 -- boot ROM jumps here in machine mode +// unused RAM after 80000000. -// Key addresses for address space layout (see kmap in vm.c for layout) -#define KERNBASE 0xFFFFFF0000000000 // First kernel virtual address -#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel -#define V2P(a) (((uint64) (a)) - KERNBASE) -#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) +// registers start here in physical memory. +#define UART0 0x10000000L -#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts -#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 64*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) diff --git a/mmu.h b/mmu.h deleted file mode 100644 index c8b45fc..0000000 --- a/mmu.h +++ /dev/null @@ -1,160 +0,0 @@ -// This file contains definitions for the -// x86 memory management unit (MMU). - -// Eflags register -#define FL_TF 0x00000100 // Trap Flag -#define FL_IF 0x00000200 // Interrupt Enable - - -// Control Register flags -#define CR0_PE 0x00000001 // Protection Enable -#define CR0_WP 0x00010000 // Write Protect -#define CR0_PG 0x80000000 // Paging - -#define CR4_PSE 0x00000010 // Page size extension - -// Segment selectors (indexes) in our GDTs. -// Defined by our convention, not the architecture. -#define SEG_KCODE32 (1<<3) // kernel 32-bit code segment -#define SEG_KCODE (2<<3) // kernel code segment -#define SEG_KDATA (3<<3) // kernel data segment -#define SEG_TSS (4<<3) // tss segment - takes two slots -#define SEG_UDATA (6<<3) // user data segment -#define SEG_UCODE (7<<3) // user code segment - -#define NSEGS 8 - -#ifndef __ASSEMBLER__ -struct segdesc { - uint16 limit0; - uint16 base0; - uint8 base1; - uint8 bits; - uint8 bitslimit1; - uint8 base2; -}; - -// SEGDESC constructs a segment descriptor literal -// with the given, base, limit, and type bits. -#define SEGDESC(base, limit, bits) (struct segdesc){ \ - (limit)&0xffff, (base)&0xffff, \ - ((base)>>16)&0xff, \ - (bits)&0xff, \ - (((bits)>>4)&0xf0) | ((limit>>16)&0xf), \ - ((base)>>24)&0xff, \ -} - -// SEGDESCHI constructs an extension segment descriptor -// literal that records the high bits of base. -#define SEGDESCHI(base) (struct segdesc) { \ - (((base)>>32)&0xffff), (((base)>>48)&0xffff), \ -} - -#endif - -#define DPL_USER 0x3 // User DPL - -#define SEG_A (1<<0) // segment accessed bit -#define SEG_R (1<<1) // readable (code) -#define SEG_W (1<<1) // writable (data) -#define SEG_C (1<<2) // conforming segment (code) -#define SEG_E (1<<2) // expand-down bit (data) -#define SEG_CODE (1<<3) // code segment (instead of data) - -// User and system segment bits. -#define SEG_S (1<<4) // if 0, system descriptor -#define SEG_DPL(x) ((x)<<5) // descriptor privilege level (2 bits) -#define SEG_P (1<<7) // segment present -#define SEG_AVL (1<<8) // available for operating system use -#define SEG_L (1<<9) // long mode -#define SEG_D (1<<10) // default operation size 32-bit -#define SEG_G (1<<11) // granularity - -// Application segment type bits -#define STA_X 0x8 // Executable segment -#define STA_W 0x2 // Writeable (non-executable segments) -#define STA_R 0x2 // Readable (executable segments) - -// System segment type bits -#define SEG_LDT (2<<0) // local descriptor table -#define SEG_TSS64A (9<<0) // available 64-bit TSS -#define SEG_TSS64B (11<<0) // busy 64-bit TSS -#define SEG_CALL64 (12<<0) // 64-bit call gate -#define SEG_INTR64 (14<<0) // 64-bit interrupt gate -#define SEG_TRAP64 (15<<0) // 64-bit trap gate - -// A virtual address 'la' has a six-part structure as follows: -// -// +--16--+---9---+------9-------+-----9----+----9-------+----12-------+ -// | Sign | PML4 |Page Directory| Page Dir |Page Table | Offset Page | -// |Extend| Index | Pointer Index| Index | Index | in Page | -// +------+-------+--------------+----------+------------+-------------+ -// L3 pgtab L2 pgtab L1 pgtab L0 pgtab - -// Page directory and page table constants. -#define NPDENTRIES 512 // # directory entries per page directory -#define PGSIZE 4096 // bytes mapped by a page -#define PGSHIFT 12 // offset of PTX in a linear address - -#define PXMASK 0x1FF -#define PXSHIFT(n) (PGSHIFT+(9*(n))) // shift for index into level n page table -#define PX(n, va) ((((uint64) (va)) >> PXSHIFT(n)) & PXMASK) -#define L_PML4 3 - -#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) -#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) - -// Page table/directory entry flags. -#define PTE_P 0x001 // Present -#define PTE_W 0x002 // Writeable -#define PTE_U 0x004 // User -#define PTE_PS 0x080 // Page Size -#define PTE_PWT 0x008 // Write-Through -#define PTE_PCD 0x010 // Cache-Disable - -// Address in page table or page directory entry -#define PTE_ADDR(pte) ((uint64)(pte) & ~0xFFF) -#define PTE_FLAGS(pte) ((uint64)(pte) & 0xFFF) - -#ifndef __ASSEMBLER__ - -typedef uint64 pte_t; - -struct taskstate { - uint8 reserved0[4]; - uint64 rsp[3]; - uint64 ist[8]; - uint8 reserved1[10]; - uint16 iomba; - uint8 iopb[0]; -} __attribute__ ((packed)); - -#define INT_P (1<<7) // interrupt descriptor present - -struct intgate -{ - uint16 rip0; - uint16 cs; - uint8 reserved0; - uint8 bits; - uint16 rip1; - uint32 rip2; - uint32 reserved1; -}; - -// INTDESC constructs an interrupt descriptor literal -// that records the given code segment, instruction pointer, -// and type bits. -#define INTDESC(cs, rip, bits) (struct intgate){ \ - (rip)&0xffff, (cs), 0, bits, ((rip)>>16)&0xffff, \ - (uint64)(rip)>>32, 0, \ -} - -// See section 4.6 of amd64 vol2 -struct desctr -{ - uint16 limit; - uint64 base; -} __attribute__((packed, aligned(16))); // important! - -#endif diff --git a/msr.h b/msr.h deleted file mode 100644 index ad901a6..0000000 --- a/msr.h +++ /dev/null @@ -1,25 +0,0 @@ -// SYSCALL and SYSRET registers -#define MSR_STAR 0xc0000081 -#define MSR_LSTAR 0xc0000082 -#define MSR_CSTAR 0xc0000083 -#define MSR_SFMASK 0xc0000084 - -// GS -#define MSR_GS_BASE 0xc0000101 -#define MSR_GS_KERNBASE 0xc0000102 - -static inline uint64 -readmsr(uint32 msr) -{ - uint32 hi, lo; - __asm volatile("rdmsr" : "=d" (hi), "=a" (lo) : "c" (msr)); - return ((uint64) lo) | (((uint64) hi) << 32); -} - -static inline void -writemsr(uint64 msr, uint64 val) -{ - uint32 lo = val & 0xffffffff; - uint32 hi = val >> 32; - __asm volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi) : "memory"); -} diff --git a/param.h b/param.h index a7e90ef..24f8c8f 100644 --- a/param.h +++ b/param.h @@ -1,5 +1,4 @@ #define NPROC 64 // maximum number of processes -#define KSTACKSIZE 4096 // size of per-process kernel stack #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process #define NFILE 100 // open files per system diff --git a/proc.c b/proc.c index 3c0acbd..e574db0 100644 --- a/proc.c +++ b/proc.c @@ -1,18 +1,20 @@ #include "types.h" -#include "defs.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" -#include "x86.h" +#include "riscv.h" #include "proc.h" #include "spinlock.h" +#include "defs.h" struct { struct spinlock lock; struct proc proc[NPROC]; } ptable; -static struct proc *initproc; +// XXX riscv move somewhere else +struct cpu cpus[NCPU]; + +struct proc *initproc; int nextpid = 1; extern void forkret(void); @@ -22,57 +24,36 @@ extern void sysexit(void); static void wakeup1(void *chan); +extern char trampstart[]; // trampoline.S + void -pinit(void) +procinit(void) { initlock(&ptable.lock, "ptable"); } -// Must be called with interrupts disabled +// Must be called with interrupts disabled. +// XXX riscv int cpuid() { - return mycpu()-cpus; + return 0; } -// Must be called with interrupts disabled to avoid the caller being -// rescheduled between reading lapicid and running through the loop. -struct cpu* -getmycpu(void) -{ - int apicid, i; - - if(readeflags()&FL_IF) - panic("getmycpu called with interrupts enabled\n"); - - apicid = lapicid(); - // APIC IDs are not guaranteed to be contiguous. - for (i = 0; i < ncpu; ++i) { - if (cpus[i].apicid == apicid) - return &cpus[i]; - } - panic("unknown apicid\n"); -} - -// Return this core's cpu struct using %gs. %gs points this core's struct -// cpu. Offet 24 in struct cpu is cpu. +// Return this core's cpu struct. +// XXX riscv struct cpu* mycpu(void) { struct cpu *c; - asm volatile("mov %%gs:24, %0" : "=r" (c)); + c = &cpus[0]; return c; } // Disable interrupts so that we are not rescheduled // while reading proc from the cpu structure +// XXX riscv struct proc* myproc(void) { - struct cpu *c; - struct proc *p; - pushcli(); - c = mycpu(); - p = c->proc; - popcli(); - return p; + return cpus[0].proc; } //PAGEBREAK: 32 @@ -84,7 +65,6 @@ static struct proc* allocproc(void) { struct proc *p; - char *sp; acquire(&ptable.lock); @@ -101,56 +81,73 @@ found: release(&ptable.lock); - // Allocate kernel stack. + // Allocate a page for the kernel stack. if((p->kstack = kalloc()) == 0){ p->state = UNUSED; return 0; } - sp = p->kstack + KSTACKSIZE; - // Leave room for syscall frame. - sp -= sizeof *p->sf; + // Allocate a trapframe page. + if((p->tf = (struct trapframe *)kalloc()) == 0){ + p->state = UNUSED; + return 0; + } - if ((uint64) sp % 16) - panic("misaligned sp"); + // An empty user page table. + p->pagetable = uvmcreate(); - p->sf = (struct sysframe*)sp; + // map the trampoline code (for system call return) + // at the highest user virtual address. + // only the supervisor uses it, on the way + // to/from user space, so not PTE_U. + mappages(p->pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampstart, PTE_R | PTE_X); + + // map the trapframe, for trampoline.S. + mappages(p->pagetable, (TRAMPOLINE - PGSIZE), PGSIZE, + (uint64)(p->tf), PTE_R | PTE_W); // Set up new context to start executing at forkret, - // which returns to sysexit. - sp -= sizeof(uint64); - *(uint64*)sp = (uint64)sysexit; - - sp -= sizeof *p->context; - p->context = (struct context*)sp; - memset(p->context, 0, sizeof *p->context); - p->context->rip = (uint64)forkret; + // which returns to user space. + memset(&p->context, 0, sizeof p->context); + p->context.ra = (uint64)forkret; + p->context.sp = (uint64)p->kstack + PGSIZE; return p; } +// XXX hack because I don't know how to incorporate initcode +// into the kernel binary. just the exec system call, no arguments. +// manually copied from initcode.asm. +unsigned char initcode[] = { + 0x85, 0x48, // li a7, 1 -- SYS_fork + 0x73, 0x00, 0x00, 0x00, // ecall + 0x8d, 0x48, // li a7, 3 -- SYS_wait + 0x73, 0x00, 0x00, 0x00, // ecall + 0x89, 0x48, // li a7, 2 -- SYS_exit + 0x73, 0x00, 0x00, 0x00, // ecall +}; + //PAGEBREAK: 32 // Set up first user process. void userinit(void) { struct proc *p; - extern char _binary_initcode_start[], _binary_initcode_size[]; p = allocproc(); - initproc = p; - if((p->pgdir = setupkvm()) == 0) - panic("userinit: out of memory?"); - inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size); + + uvminit(p->pagetable, initcode, sizeof(initcode)); p->sz = PGSIZE; - memset(p->sf, 0, sizeof(*p->sf)); - p->sf->r11 = FL_IF; - p->sf->rsp = PGSIZE; - p->sf->rcx = 0; // beginning of initcode.S + + // prepare for the very first kernel->user. + p->tf->epc = 0; + p->tf->sp = PGSIZE; safestrcpy(p->name, "initcode", sizeof(p->name)); - p->cwd = namei("/"); + // XXX riscv + //p->cwd = namei("/"); // this assignment to p->state lets other cores // run this process. the acquire forces the above @@ -163,62 +160,65 @@ userinit(void) release(&ptable.lock); } +#if 0 + // Grow current process's memory by n bytes. // Return 0 on success, -1 on failure. int growproc(int n) { uint sz; - struct proc *curproc = myproc(); + struct proc *p = myproc(); - sz = curproc->sz; + sz = p->sz; if(n > 0){ - if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) + if((sz = allocuvm(p->pagetable, sz, sz + n)) == 0) return -1; } else if(n < 0){ - if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) + if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) return -1; } - curproc->sz = sz; - switchuvm(curproc); + p->sz = sz; + switchuvm(p); return 0; } +#endif -// Create a new process copying p as the parent. -// Sets up stack to return as if from system call. -// Caller must set state of returned proc to RUNNABLE. +// Create a new process, copying p as the parent. +// Sets up child kernel stack to return as if from system call. int fork(void) { int i, pid; struct proc *np; - struct proc *curproc = myproc(); + struct proc *p = myproc(); // Allocate process. if((np = allocproc()) == 0){ return -1; } - // Copy process state from proc. - if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ - kfree(np->kstack); - np->kstack = 0; - np->state = UNUSED; - return -1; - } - np->sz = curproc->sz; - np->parent = curproc; - *np->sf = *curproc->sf; + // Copy user memory from parent to child. + uvmcopy(p->pagetable, np->pagetable, p->sz); + np->sz = p->sz; - // Clear %eax so that fork returns 0 in the child. - np->sf->rax = 0; + np->parent = p; + // copy saved user registers. + *(np->tf) = *(p->tf); + + // Cause fork to return 0 in the child. + np->tf->a0 = 0; + +#if 0 // XXX riscv + // increment reference counts on open file descriptors. for(i = 0; i < NOFILE; i++) - if(curproc->ofile[i]) - np->ofile[i] = filedup(curproc->ofile[i]); - np->cwd = idup(curproc->cwd); + if(p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwd = idup(p->cwd); +#endif - safestrcpy(np->name, curproc->name, sizeof(curproc->name)); + safestrcpy(np->name, p->name, sizeof(p->name)); pid = np->pid; @@ -233,46 +233,48 @@ fork(void) // Exit the current process. Does not return. // An exited process remains in the zombie state -// until its parent calls wait() to find out it exited. +// until its parent calls wait(). void exit(void) { - struct proc *curproc = myproc(); - struct proc *p; + struct proc *p = myproc(); + struct proc *pp; int fd; - if(curproc == initproc) + if(p == initproc) panic("init exiting"); +#if 0 // XXX riscv // Close all open files. for(fd = 0; fd < NOFILE; fd++){ - if(curproc->ofile[fd]){ - fileclose(curproc->ofile[fd]); - curproc->ofile[fd] = 0; + if(p->ofile[fd]){ + fileclose(p->ofile[fd]); + p->ofile[fd] = 0; } } begin_op(); - iput(curproc->cwd); + iput(p->cwd); end_op(); - curproc->cwd = 0; +#endif + p->cwd = 0; acquire(&ptable.lock); // Parent might be sleeping in wait(). - wakeup1(curproc->parent); + wakeup1(p->parent); // Pass abandoned children to init. - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent == curproc){ - p->parent = initproc; - if(p->state == ZOMBIE) + for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){ + if(pp->parent == p){ + pp->parent = initproc; + if(pp->state == ZOMBIE) wakeup1(initproc); } } // Jump into the scheduler, never to return. - curproc->state = ZOMBIE; + p->state = ZOMBIE; sched(); panic("zombie exit"); } @@ -282,42 +284,47 @@ exit(void) int wait(void) { - struct proc *p; + struct proc *np; int havekids, pid; - struct proc *curproc = myproc(); + struct proc *p = myproc(); acquire(&ptable.lock); for(;;){ // Scan through table looking for exited children. havekids = 0; - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent != curproc) + for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){ + if(np->parent != p) continue; havekids = 1; - if(p->state == ZOMBIE){ + if(np->state == ZOMBIE){ // Found one. - pid = p->pid; - kfree(p->kstack); - p->kstack = 0; - freevm(p->pgdir, p->sz); - p->pid = 0; - p->parent = 0; - p->name[0] = 0; - p->killed = 0; - p->state = UNUSED; + pid = np->pid; + kfree(np->kstack); + np->kstack = 0; + kfree((void*)np->tf); + np->tf = 0; + unmappages(np->pagetable, TRAMPOLINE, PGSIZE, 0); + unmappages(np->pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0); + uvmfree(np->pagetable, np->sz); + np->pagetable = 0; + np->pid = 0; + np->parent = 0; + np->name[0] = 0; + np->killed = 0; + np->state = UNUSED; release(&ptable.lock); return pid; } } // No point waiting if we don't have any children. - if(!havekids || curproc->killed){ + if(!havekids || p->killed){ release(&ptable.lock); return -1; } // Wait for children to exit. (See wakeup1 call in proc_exit.) - sleep(curproc, &ptable.lock); //DOC: wait-sleep + sleep(p, &ptable.lock); //DOC: wait-sleep } } @@ -338,7 +345,8 @@ scheduler(void) c->proc = 0; for(;;){ // Enable interrupts on this processor. - sti(); + // XXX riscv + //sti(); // Loop over process table looking for process to run. acquire(&ptable.lock); @@ -350,11 +358,11 @@ scheduler(void) // to release ptable.lock and then reacquire it // before jumping back to us. c->proc = p; - switchuvm(p); p->state = RUNNING; - swtch(&(c->scheduler), p->context); - switchkvm(); + printf("switch...\n"); + swtch(&c->scheduler, &p->context); + printf("switch returned\n"); // Process is done running for now. // It should have changed its p->state before coming back. @@ -380,14 +388,10 @@ sched(void) if(!holding(&ptable.lock)) panic("sched ptable.lock"); - if(mycpu()->ncli != 1) - panic("sched locks"); if(p->state == RUNNING) panic("sched running"); - if(readeflags()&FL_IF) - panic("sched interruptible"); intena = mycpu()->intena; - swtch(&p->context, mycpu()->scheduler); + swtch(&p->context, &mycpu()->scheduler); mycpu()->intena = intena; } @@ -402,24 +406,29 @@ yield(void) } // A fork child's very first scheduling by scheduler() -// will swtch here. "Return" to user space. +// will swtch to forkret. void forkret(void) { + struct proc *p = myproc(); + static int first = 1; // Still holding ptable.lock from scheduler. release(&ptable.lock); + printf("entering forkret\n"); + if (first) { // Some initialization functions must be run in the context // of a regular process (e.g., they call sleep), and thus cannot // be run from main(). first = 0; - iinit(ROOTDEV); - initlog(ROOTDEV); + // XXX riscv + //iinit(ROOTDEV); + //initlog(ROOTDEV); } - - // Return to "caller", actually trapret (see allocproc). + + usertrapret(); } // Atomically release lock and sleep on chan. @@ -483,6 +492,8 @@ wakeup(void *chan) release(&ptable.lock); } +#if 0 + // Kill the process with the given pid. // Process won't exit until it returns // to user space (see trap in trap.c). @@ -533,12 +544,14 @@ procdump(void) state = states[p->state]; else state = "???"; - cprintf("%d %s %s", p->pid, state, p->name); + printf("%d %s %s", p->pid, state, p->name); if(p->state == SLEEPING){ getcallerpcs((uint64*)p->context->rbp+2, pc); for(i=0; i<10 && pc[i] != 0; i++) - cprintf(" %p", pc[i]); + printf(" %p", pc[i]); } - cprintf("\n"); + printf("\n"); } } + +#endif diff --git a/proc.h b/proc.h index 0b01b75..1e2238c 100644 --- a/proc.h +++ b/proc.h @@ -1,13 +1,30 @@ +// Saved registers for kernel context switches. +struct context { + uint64 ra; + uint64 sp; + + // callee-saved + uint64 s0; + uint64 s1; + uint64 s2; + uint64 s3; + uint64 s4; + uint64 s5; + uint64 s6; + uint64 s7; + uint64 s8; + uint64 s9; + uint64 s10; + uint64 s11; +}; + // Per-CPU state struct cpu { uint64 syscallno; // Temporary used by sysentry uint64 usp; // Temporary used by sysentry struct proc *proc; // The process running on this cpu or null struct cpu *cpu; // XXX - uchar apicid; // Local APIC ID - struct context *scheduler; // swtch() here to enter scheduler - struct taskstate ts; // Used by x86 to find stack for interrupt - struct segdesc gdt[NSEGS]; // x86 global descriptor table + struct context scheduler; // swtch() here to enter scheduler volatile uint started; // Has the CPU started? int ncli; // Depth of pushcli nesting. int intena; // Were interrupts enabled before pushcli? @@ -17,39 +34,52 @@ extern struct cpu cpus[NCPU]; extern int ncpu; //PAGEBREAK: 17 -// Saved registers for kernel context switches. -// Don't need to save all the segment registers (%cs, etc), -// because they are constant across kernel contexts. -// Don't need to save %eax, %ecx, %edx, because the -// x86 convention is that the caller has saved them. -// Contexts are stored at the bottom of the stack they -// describe; the stack pointer is the address of the context. -// The layout of the context matches the layout of the stack in swtch.S -// at the "Switch stacks" comment. Switch doesn't save eip explicitly, -// but it is on the stack and allocproc() manipulates it. -struct context { - uint64 r15; - uint64 r14; - uint64 r13; - uint64 r12; - uint64 r11; - uint64 rbx; - uint64 rbp; - uint64 rip; + +// per-process data for the early trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// trampoline.S saves user registers, then restores kernel_sp and +// kernel_satp. +// no need to save s0-s11 (callee-saved) since C code and swtch() save them. +struct trapframe { + /* 0 */ uint64 kernel_satp; + /* 8 */ uint64 kernel_sp; + /* 16 */ uint64 kernel_trap; // address of trap() + /* 24 */ uint64 epc; // saved user program counter + /* 32 */ uint64 ra; + /* 40 */ uint64 sp; + /* 48 */ uint64 gp; + /* 56 */ uint64 tp; + /* 64 */ uint64 t0; + /* 72 */ uint64 t1; + /* 80 */ uint64 t2; + /* 88 */ uint64 a0; + /* 96 */ uint64 a1; + /* 104 */ uint64 a2; + /* 112 */ uint64 a3; + /* 120 */ uint64 a4; + /* 128 */ uint64 a5; + /* 136 */ uint64 a6; + /* 144 */ uint64 a7; + /* 152 */ uint64 t3; + /* 160 */ uint64 t4; + /* 168 */ uint64 t5; + /* 176 */ uint64 t6; }; enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { - char *kstack; // Bottom of kernel stack for this process, must be first entry + char *kstack; // Bottom of kernel stack for this process uint64 sz; // Size of process memory (bytes) - pde_t* pgdir; // Page table + pagetable_t pagetable; // Page table enum procstate state; // Process state int pid; // Process ID struct proc *parent; // Parent process - struct sysframe *sf; // Syscall frame for current syscall - struct context *context; // swtch() here to run process + struct trapframe *tf; // data page for trampoline.S + struct context context; // swtch() here to run process void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed struct file *ofile[NOFILE]; // Open files diff --git a/riscv.h b/riscv.h new file mode 100644 index 0000000..92fa6e3 --- /dev/null +++ b/riscv.h @@ -0,0 +1,172 @@ +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) + +static inline uint64 +r_mstatus() +{ + uint64 x; + asm("csrr %0, mstatus" : "=r" (x) ); + return x; +} + +static inline void +w_mstatus(uint64 x) +{ + asm("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_mepc(uint64 x) +{ + asm("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8) // 1=Supervisor, 0=User + +static inline uint64 +r_sstatus() +{ + uint64 x; + asm("csrr %0, sstatus" : "=r" (x) ); + return x; +} + +static inline void +w_sstatus(uint64 x) +{ + asm("csrw sstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_sepc(uint64 x) +{ + asm("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ + uint64 x; + asm("csrr %0, sepc" : "=r" (x) ); + return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ + uint64 x; + asm("csrr %0, medeleg" : "=r" (x) ); + return x; +} + +static inline void +w_medeleg(uint64 x) +{ + asm("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ + uint64 x; + asm("csrr %0, mideleg" : "=r" (x) ); + return x; +} + +static inline void +w_mideleg(uint64 x) +{ + asm("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void +w_stvec(uint64 x) +{ + asm("csrw stvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void +w_satp(uint64 x) +{ + asm("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ + uint64 x; + asm("csrr %0, satp" : "=r" (x) ); + return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void +w_sscratch(uint64 x) +{ + asm("csrw sscratch, %0" : : "r" (x)); +} + +// Supervisor trap cause +static inline uint64 +r_scause() +{ + uint64 x; + asm("csrr %0, scause" : "=r" (x) ); + return x; +} + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12 // bits of offset within a page + +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK 0x1FF // 9 bits +#define PXSHIFT(level) (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs diff --git a/spinlock.c b/spinlock.c index 9ee65f6..0377870 100644 --- a/spinlock.c +++ b/spinlock.c @@ -1,13 +1,11 @@ // Mutual exclusion spin locks. #include "types.h" -#include "defs.h" #include "param.h" -#include "x86.h" #include "memlayout.h" -#include "mmu.h" -#include "proc.h" #include "spinlock.h" +#include "riscv.h" +#include "defs.h" void initlock(struct spinlock *lk, char *name) @@ -17,6 +15,27 @@ initlock(struct spinlock *lk, char *name) lk->cpu = 0; } +void +acquire(struct spinlock *lk) +{ + lk->locked = 1; + lk->cpu = mycpu(); +} + +void +release(struct spinlock *lk) +{ + lk->locked = 0; + lk->cpu = 0; +} + +int +holding(struct spinlock *lk) +{ + return lk->locked && lk->cpu == mycpu(); +} + +#if 0 // Acquire the lock. // Loops (spins) until the lock is acquired. // Holding a lock for a long time may cause @@ -37,7 +56,7 @@ acquire(struct spinlock *lk) // references happen after the lock is acquired. __sync_synchronize(); - // Record info about lock acquisition for debugging. + // Record info about lock acquisition for holding() and debugging. lk->cpu = mycpu(); getcallerpcs(&lk, lk->pcs); } @@ -87,11 +106,11 @@ getcallerpcs(void *v, uint64 pcs[]) // Check whether this cpu is holding the lock. int -holding(struct spinlock *lock) +holding(struct spinlock *lk) { int r; pushcli(); - r = lock->locked && lock->cpu == mycpu(); + r = lk->locked && lk->cpu == mycpu(); popcli(); return r; } @@ -123,4 +142,4 @@ popcli(void) if(mycpu()->ncli == 0 && mycpu()->intena) sti(); } - +#endif diff --git a/start.c b/start.c new file mode 100644 index 0000000..5167ab4 --- /dev/null +++ b/start.c @@ -0,0 +1,34 @@ +#include "types.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); + +// entry.S uses this as the initial stack. +char stack0[4096]; + +// entry.S jumps here in machine mode on stack0. +void +mstart() +{ + // set M Previous Privilege mode to Supervisor, for mret. + unsigned long x = r_mstatus(); + x &= ~MSTATUS_MPP_MASK; + x |= MSTATUS_MPP_S; + w_mstatus(x); + + // set M Exception Program Counter to main, for mret. + // requires gcc -mcmodel=medany + w_mepc((uint64)main); + + // disable paging for now. + w_satp(0); + + // delegate all interrupts and exceptions to supervisor mode. + w_medeleg(0xffff); + w_mideleg(0xffff); + + // jump to main in supervisor mode. + asm("mret"); +} diff --git a/string.c b/string.c index 861ea25..d99e612 100644 --- a/string.c +++ b/string.c @@ -1,14 +1,13 @@ #include "types.h" -#include "x86.h" void* memset(void *dst, int c, uint n) { - if ((uint64)dst%4 == 0 && n%4 == 0){ - c &= 0xFF; - stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); - } else - stosb(dst, c, n); + char *cdst = (char *) dst; + int i; + for(i = 0; i < n; i++){ + cdst[i] = c; + } return dst; } diff --git a/swtch.S b/swtch.S index aa527d8..17a8663 100644 --- a/swtch.S +++ b/swtch.S @@ -1,35 +1,42 @@ # Context switch # -# void swtch(struct context **old, struct context *new); +# void swtch(struct context *old, struct context *new); # -# Save the current registers on the stack, creating -# a struct context, and save its address in *old. -# Switch stacks to new and pop previously-saved registers. +# Save current registers in old. Load from new. + .globl swtch swtch: - # Save old callee-saved registers - push %rbp - push %rbx - push %r11 - push %r12 - push %r13 - push %r14 - push %r15 + sd ra, 0(a0) + sd sp, 8(a0) + sd s0, 16(a0) + sd s1, 24(a0) + sd s2, 32(a0) + sd s3, 40(a0) + sd s4, 48(a0) + sd s5, 56(a0) + sd s6, 64(a0) + sd s7, 72(a0) + sd s8, 80(a0) + sd s9, 88(a0) + sd s10, 96(a0) + sd s11, 104(a0) - # Switch stacks - mov %rsp, (%rdi) # first arg of swtch is in rdi - mov %rsi, %rsp # second arg of swtch is in rsi - - # Load new callee-saved registers - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %r11 - pop %rbx - pop %rbp - - ret + ld ra, 0(a1) + ld sp, 8(a1) + ld s0, 16(a1) + ld s1, 24(a1) + ld s2, 32(a1) + ld s3, 40(a1) + ld s4, 48(a1) + ld s5, 56(a1) + ld s6, 64(a1) + ld s7, 72(a1) + ld s8, 80(a1) + ld s9, 88(a1) + ld s10, 96(a1) + ld s11, 104(a1) + + ret diff --git a/syscall.c b/syscall.c index b815f28..d3825cb 100644 --- a/syscall.c +++ b/syscall.c @@ -1,11 +1,10 @@ #include "types.h" -#include "defs.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" +#include "riscv.h" #include "proc.h" -#include "x86.h" #include "syscall.h" +#include "defs.h" // User code makes a system call with INT T_SYSCALL. // System call number in %eax. @@ -17,9 +16,9 @@ int fetchint(uint64 addr, int *ip) { - struct proc *curproc = myproc(); + struct proc *p = myproc(); - if(addr >= curproc->sz || addr+4 > curproc->sz) + if(addr >= p->sz || addr+4 > p->sz) return -1; *ip = *(uint64*)(addr); return 0; @@ -29,8 +28,8 @@ fetchint(uint64 addr, int *ip) int fetchaddr(uint64 addr, uint64 *ip) { - struct proc *curproc = myproc(); - if(addr >= curproc->sz || addr+sizeof(uint64) > curproc->sz) + struct proc *p = myproc(); + if(addr >= p->sz || addr+sizeof(uint64) > p->sz) return -1; *ip = *(uint64*)(addr); return 0; @@ -43,12 +42,12 @@ int fetchstr(uint64 addr, char **pp) { char *s, *ep; - struct proc *curproc = myproc(); + struct proc *p = myproc(); - if(addr >= curproc->sz) + if(addr >= p->sz) return -1; *pp = (char*)addr; - ep = (char*)curproc->sz; + ep = (char*)p->sz; for(s = *pp; s < ep; s++){ if(*s == 0) return s - *pp; @@ -59,20 +58,20 @@ fetchstr(uint64 addr, char **pp) static uint64 fetcharg(int n) { - struct proc *curproc = myproc(); + struct proc *p = myproc(); switch (n) { case 0: - return curproc->sf->rdi; + return p->tf->a0; case 1: - return curproc->sf->rsi; + return p->tf->a1; case 2: - return curproc->sf->rdx; + return p->tf->a2; case 3: - return curproc->sf->r10; + return p->tf->a3; case 4: - return curproc->sf->r8; + return p->tf->a4; case 5: - return curproc->sf->r9; + return p->tf->a5; } panic("fetcharg"); return -1; @@ -100,11 +99,11 @@ int argptr(int n, char **pp, int size) { uint64 i; - struct proc *curproc = myproc(); + struct proc *p = myproc(); if(argaddr(n, &i) < 0) return -1; - if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) + if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz) return -1; *pp = (char*)i; return 0; @@ -149,48 +148,47 @@ static int (*syscalls[])(void) = { [SYS_fork] sys_fork, [SYS_exit] sys_exit, [SYS_wait] sys_wait, -[SYS_pipe] sys_pipe, -[SYS_read] sys_read, -[SYS_kill] sys_kill, -[SYS_exec] sys_exec, -[SYS_fstat] sys_fstat, -[SYS_chdir] sys_chdir, -[SYS_dup] sys_dup, +//[SYS_pipe] sys_pipe, +//[SYS_read] sys_read, +//[SYS_kill] sys_kill, +//[SYS_exec] sys_exec, +//[SYS_fstat] sys_fstat, +//[SYS_chdir] sys_chdir, +//[SYS_dup] sys_dup, [SYS_getpid] sys_getpid, -[SYS_sbrk] sys_sbrk, -[SYS_sleep] sys_sleep, -[SYS_uptime] sys_uptime, -[SYS_open] sys_open, -[SYS_write] sys_write, -[SYS_mknod] sys_mknod, -[SYS_unlink] sys_unlink, -[SYS_link] sys_link, -[SYS_mkdir] sys_mkdir, -[SYS_close] sys_close, +//[SYS_sbrk] sys_sbrk, +//[SYS_sleep] sys_sleep, +//[SYS_uptime] sys_uptime, +//[SYS_open] sys_open, +//[SYS_write] sys_write, +//[SYS_mknod] sys_mknod, +//[SYS_unlink] sys_unlink, +//[SYS_link] sys_link, +//[SYS_mkdir] sys_mkdir, +//[SYS_close] sys_close, }; static void dosyscall(void) { int num; - struct proc *curproc = myproc(); + struct proc *p = myproc(); - num = curproc->sf->rax; + num = p->tf->a7; if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { - curproc->sf->rax = syscalls[num](); + p->tf->a0 = syscalls[num](); } else { - cprintf("%d %s: unknown sys call %d\n", - curproc->pid, curproc->name, num); - curproc->sf->rax = -1; + printf("%d %s: unknown sys call %d\n", + p->pid, p->name, num); + p->tf->a0 = -1; } } void -syscall(struct sysframe *sf) +syscall() { if(myproc()->killed) exit(); - myproc()->sf = sf; dosyscall(); if(myproc()->killed) exit(); diff --git a/sysfile.c b/sysfile.c index d0de779..94f6437 100644 --- a/sysfile.c +++ b/sysfile.c @@ -41,11 +41,11 @@ static int fdalloc(struct file *f) { int fd; - struct proc *curproc = myproc(); + struct proc *p = myproc(); for(fd = 0; fd < NOFILE; fd++){ - if(curproc->ofile[fd] == 0){ - curproc->ofile[fd] = f; + if(p->ofile[fd] == 0){ + p->ofile[fd] = f; return fd; } } @@ -374,7 +374,7 @@ sys_chdir(void) { char *path; struct inode *ip; - struct proc *curproc = myproc(); + struct proc *p = myproc(); begin_op(); if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ @@ -388,9 +388,9 @@ sys_chdir(void) return -1; } iunlock(ip); - iput(curproc->cwd); + iput(p->cwd); end_op(); - curproc->cwd = ip; + p->cwd = ip; return 0; } diff --git a/sysproc.c b/sysproc.c index 0686d29..f840738 100644 --- a/sysproc.c +++ b/sysproc.c @@ -1,18 +1,11 @@ #include "types.h" -#include "x86.h" +#include "riscv.h" #include "defs.h" #include "date.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" #include "proc.h" -int -sys_fork(void) -{ - return fork(); -} - int sys_exit(void) { @@ -20,12 +13,25 @@ sys_exit(void) return 0; // not reached } +int +sys_getpid(void) +{ + return myproc()->pid; +} + +int +sys_fork(void) +{ + return fork(); +} + int sys_wait(void) { return wait(); } +#if 0 int sys_kill(void) { @@ -36,12 +42,6 @@ sys_kill(void) return kill(pid); } -int -sys_getpid(void) -{ - return myproc()->pid; -} - int sys_sbrk(void) { @@ -89,3 +89,4 @@ sys_uptime(void) release(&tickslock); return xticks; } +#endif diff --git a/trampoline.S b/trampoline.S new file mode 100644 index 0000000..109dd93 --- /dev/null +++ b/trampoline.S @@ -0,0 +1,108 @@ + # + # code to switch between user and kernel space. + # + # this code is mapped at the same virtual address + # in user and kernel space so that it can switch + # page tables. + # + # kernel.ld causes trampstart to be aligned + # to a page boundary. + # +.globl usertrap + .section trampoline +.globl trampstart +trampstart: + # switch from kernel to user. + # a0: p->tf in user page table + # a1: new value for satp, for user page table + + # switch to user page table + csrw satp, a1 + + # put the saved user a0 in sscratch, so we + # can swap it with our a0 (p->tf) in the last step. + ld t0, 80(a0) + csrw sscratch, t0 + + # restore all but a0 from p->tf + ld ra, 32(a0) + ld sp, 40(a0) + ld gp, 48(a0) + ld tp, 56(a0) + ld t0, 64(a0) + ld t1, 72(a0) + ld t2, 80(a0) + ld a1, 96(a0) + ld a2, 104(a0) + ld a3, 112(a0) + ld a4, 120(a0) + ld a5, 128(a0) + ld a6, 136(a0) + ld a7, 144(a0) + ld t3, 152(a0) + ld t4, 160(a0) + ld t5, 168(a0) + ld t6, 176(a0) + + # restore user a0, and save p->tf + csrrw a0, sscratch, a0 + + # return to user mode and user pc. + # caller has set up sstatus and sepc. + sret + + # + # trap.c set stvec to point here, so + # interrupts and exceptions start here, + # in supervisor mode, but with a + # user page table. + # + # sscratch points to where the process's p->tf is + # mapped into user space (TRAMPOLINE - 4096). + # +.align 4 +.globl trampvec +trampvec: + # swap a0 and sscratch + # so that a0 is p->tf + csrrw a0, sscratch, a0 + + # save the user registers in p->tf + sd ra, 32(a0) + sd sp, 40(a0) + sd gp, 48(a0) + sd tp, 56(a0) + sd t0, 64(a0) + sd t1, 72(a0) + sd t2, 80(a0) + sd a1, 96(a0) + sd a2, 104(a0) + sd a3, 112(a0) + sd a4, 120(a0) + sd a5, 128(a0) + sd a6, 136(a0) + sd a7, 144(a0) + sd t3, 152(a0) + sd t4, 160(a0) + sd t5, 168(a0) + sd t6, 176(a0) + + # save the user a0 in p->tf->a0 + csrr t0, sscratch + sd t0, 80(a0) + + # restore kernel stack pointer from p->tf->kernel_sp + ld sp, 8(a0) + + # remember the address of usertrap(), p->tf->kernel_trap + ld t0, 16(a0) + + # restore kernel page table from p->tf->kernel_satp + ld t1, 0(a0) + csrw satp, t1 + + # a0 is no longer valid, since the kernel page + # table does not specially map p->td. + + # jump to usertrap(), which does not return + jr t0 diff --git a/trap.c b/trap.c index 4c58cb2..d0368ce 100644 --- a/trap.c +++ b/trap.c @@ -1,109 +1,113 @@ #include "types.h" -#include "defs.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" +#include "riscv.h" #include "proc.h" -#include "x86.h" -#include "traps.h" #include "spinlock.h" +#include "defs.h" -// Interrupt descriptor table (shared by all CPUs). -struct intgate idt[256]; -extern uint64 vectors[]; // in vectors.S: array of 256 entry pointers struct spinlock tickslock; uint ticks; +extern char trampstart[], trampvec[]; + +void kerneltrap(); + void -tvinit(void) +trapinit(void) { int i; - for(i=0; i<256; i++) { - idt[i] = INTDESC(SEG_KCODE, vectors[i], INT_P | SEG_INTR64); - } - idtinit(); - + // send interrupts and exceptions to kerneltrap(). + w_stvec((uint64)kerneltrap); + initlock(&tickslock, "time"); } +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// void -idtinit(void) +usertrap(void) { - struct desctr dtr; + if((r_sstatus() & SSTATUS_SPP) != 0) + panic("usertrap: not from user mode"); - dtr.limit = sizeof(idt) - 1; - dtr.base = (uint64)idt; - lidt((void *)&dtr.limit); -} + // send interrupts and exceptions to kerneltrap(), + // since we're now in the kernel. + w_stvec((uint64)kerneltrap); -//PAGEBREAK: 41 -void -trap(struct trapframe *tf) -{ - switch(tf->trapno){ - case T_IRQ0 + IRQ_TIMER: - if(cpuid() == 0){ - acquire(&tickslock); - ticks++; - wakeup(&ticks); - release(&tickslock); - } - lapiceoi(); - break; - case T_IRQ0 + IRQ_IDE: - ideintr(); - lapiceoi(); - break; - case T_IRQ0 + IRQ_IDE+1: - // Bochs generates spurious IDE1 interrupts. - break; - case T_IRQ0 + IRQ_KBD: - kbdintr(); - lapiceoi(); - break; - case T_IRQ0 + IRQ_COM1: - uartintr(); - lapiceoi(); - break; - case T_IRQ0 + 7: - case T_IRQ0 + IRQ_SPURIOUS: - cprintf("cpu%d: spurious interrupt at %x:%x\n", - cpuid(), tf->cs, tf->rip); - lapiceoi(); - break; + struct proc *p = myproc(); + + // save user program counter. + p->tf->epc = r_sepc(); + + if(r_scause() == 8){ + // system call + printf("usertrap(): system call pid=%d syscall=%d\n", p->pid, p->tf->a7); - //PAGEBREAK: 13 - default: - if(myproc() == 0 || (tf->cs&3) == 0){ - // In kernel, it must be our mistake. - cprintf("unexpected trap %d from cpu %d rip %x (cr2=0x%x)\n", - tf->trapno, cpuid(), tf->rip, rcr2()); - panic("trap"); - } - // In user space, assume process misbehaved. - cprintf("pid %d %s: trap %d err %d on cpu %d " - "rip 0x%x addr 0x%x--kill proc\n", - myproc()->pid, myproc()->name, tf->trapno, - tf->err, cpuid(), tf->rip, rcr2()); - myproc()->killed = 1; + // sepc points to the ecall instruction, + // but we want to return to the next instruction. + p->tf->epc += 4; + + syscall(); + } else { + printf("usertrap(): unexpected scause 0x%x pid=%d\n", r_scause(), p->pid); + panic("usertrap"); } - // Force process exit if it has been killed and is in user space. - // (If it is still executing in the kernel, let it keep running - // until it gets to the regular system call return.) - if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) - exit(); - - // Force process to give up CPU on clock tick. - // If interrupts were on while locks held, would need to check nlock. - if(myproc() && myproc()->state == RUNNING && - tf->trapno == T_IRQ0+IRQ_TIMER) - yield(); - - // Check if the process has been killed since we yielded - if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) - exit(); + usertrapret(); } +// +// return to user space +// +void +usertrapret(void) +{ + struct proc *p = myproc(); + // XXX turn off interrupts, since we're switching + // now from kerneltrap() to usertrap(). + + // send interrupts and exceptions to trampoline.S + w_stvec(TRAMPOLINE + (trampvec - trampstart)); + + // set up values that trampoline.S will need when + // the process next re-enters the kernel. + p->tf->kernel_satp = r_satp(); + p->tf->kernel_sp = (uint64)p->kstack + PGSIZE; + p->tf->kernel_trap = (uint64)usertrap; + + // set up the registers that trampoline.S's sret will use + // to get to user space. + + // set S Previous Privilege mode to User. + unsigned long x = r_sstatus(); + x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode + w_sstatus(x); + + // set S Exception Program Counter to the saved user pc. + w_sepc(p->tf->epc); + + // tell trampline.S the user page table to switch to. + uint64 satp = MAKE_SATP(p->pagetable); + + // jump to trampoline.S at the top of memory, which + // switches to the user page table, restores user registers, + // and switches to user mode with sret. + ((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp); +} + +// interrupts and exceptions from kernel code go here, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void __attribute__ ((aligned (4))) +kerneltrap() +{ + if((r_sstatus() & SSTATUS_SPP) == 0) + panic("kerneltrap: not from supervisor mode"); + + panic("kerneltrap"); +} diff --git a/traps.h b/traps.h deleted file mode 100644 index 6e8a444..0000000 --- a/traps.h +++ /dev/null @@ -1,36 +0,0 @@ -// x86 trap and interrupt constants. - -// Processor-defined: -#define T_DIVIDE 0 // divide error -#define T_DEBUG 1 // debug exception -#define T_NMI 2 // non-maskable interrupt -#define T_BRKPT 3 // breakpoint -#define T_OFLOW 4 // overflow -#define T_BOUND 5 // bounds check -#define T_ILLOP 6 // illegal opcode -#define T_DEVICE 7 // device not available -#define T_DBLFLT 8 // double fault -// #define T_COPROC 9 // reserved (not used since 486) -#define T_TSS 10 // invalid task switch segment -#define T_SEGNP 11 // segment not present -#define T_STACK 12 // stack exception -#define T_GPFLT 13 // general protection fault -#define T_PGFLT 14 // page fault -// #define T_RES 15 // reserved -#define T_FPERR 16 // floating point error -#define T_ALIGN 17 // aligment check -#define T_MCHK 18 // machine check -#define T_SIMDERR 19 // SIMD floating point error - -#define T_DEFAULT 500 // catchall - -#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ - -#define IRQ_TIMER 0 -#define IRQ_KBD 1 -#define IRQ_COM1 4 -#define IRQ_IDE 14 -#define IRQ_ERROR 19 -#define IRQ_SPURIOUS 31 - - diff --git a/uart.c b/uart.c index b8946da..9a77c5a 100644 --- a/uart.c +++ b/uart.c @@ -1,77 +1,51 @@ -// Intel 8250 serial port (UART). +#include "memlayout.h" -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" +// +// qemu -machine virt has a 16550a UART +// qemu/hw/riscv/virt.c +// http://byterunner.com/16550.html +// +// caller should lock. +// -#define COM1 0x3f8 - -static int uart; // is there a uart? +// address of one of the registers +#define R(reg) ((unsigned int*)(UART0 + 4*(reg))) void uartinit(void) { - char *p; + // disable interrupts + *R(1) = 0x00; - // Turn off the FIFO - outb(COM1+2, 0); + // special mode to set baud rate + *R(3) = 0x80; - // 9600 baud, 8 data bits, 1 stop bit, parity off. - outb(COM1+3, 0x80); // Unlock divisor - outb(COM1+0, 115200/9600); - outb(COM1+1, 0); - outb(COM1+3, 0x03); // Lock divisor, 8 data bits. - outb(COM1+4, 0); - outb(COM1+1, 0x01); // Enable receive interrupts. + // LSB for baud rate of 38.4K + *R(0) = 0x03; - // If status is 0xFF, no serial port. - if(inb(COM1+5) == 0xFF) - return; - uart = 1; + // MSB for baud rate of 38.4K + *R(1) = 0x00; - // Acknowledge pre-existing interrupt conditions; - // enable interrupts. - inb(COM1+2); - inb(COM1+0); - ioapicenable(IRQ_COM1, 0); + // leave set-baud mode, + // and set word length to 8 bits, no parity. + *R(3) = 0x03; - // Announce that we're here. - for(p="xv6...\n"; *p; p++) - uartputc(*p); + // reset and enable FIFOs. + *R(2) = 0x07; } void uartputc(int c) { - int i; - - if(!uart) - return; - for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++) - microdelay(10); - outb(COM1+0, c); + *R(0) = c; } static int uartgetc(void) { - if(!uart) - return -1; - if(!(inb(COM1+5) & 0x01)) - return -1; - return inb(COM1+0); } void uartintr(void) { - consoleintr(uartgetc); } diff --git a/vm.c b/vm.c index c0276a1..8c2ccb3 100644 --- a/vm.c +++ b/vm.c @@ -1,230 +1,162 @@ #include "param.h" #include "types.h" -#include "defs.h" -#include "x86.h" -#include "msr.h" #include "memlayout.h" -#include "mmu.h" -#include "proc.h" #include "elf.h" -#include "traps.h" +#include "riscv.h" +#include "defs.h" -extern char data[]; // defined by kernel.ld -void sysentry(void); +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; -static pde_t *kpml4; // kernel address space, used by scheduler and bootup +extern char etext[]; // kernel.ld sets this to end of kernel code. -// Bootstrap GDT. Used by boot.S but defined in C -// Map "logical" addresses to virtual addresses using identity map. -// Cannot share a CODE descriptor for both kernel and user -// because it would have to have DPL_USR, but the CPU forbids -// an interrupt from CPL=0 to DPL=3. -struct segdesc bootgdt[NSEGS] = { - [0] = SEGDESC(0, 0, 0), // null - [1] = SEGDESC(0, 0xfffff, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // 32-bit kernel code - [2] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_L|SEG_G), // 64-bit kernel code - [3] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G), // kernel data - // The order of the user data and user code segments is - // important for syscall instructions. See initseg. - [6] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(3)|SEG_P|SEG_D|SEG_G), // 64-bit user data - [7] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(3)|SEG_P|SEG_L|SEG_G), // 64-bit user code -}; +extern char trampstart[]; // trampoline.S - -// Set up CPU's kernel segment descriptors. -// Run once on entry on each CPU. +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */ void -seginit(void) +kvminit() { - struct cpu *c; - struct desctr dtr; + kernel_pagetable = (pagetable_t) kalloc(); + memset(kernel_pagetable, 0, PGSIZE); - c = getmycpu(); + // uart registers + mappages(kernel_pagetable, UART0, PGSIZE, + UART0, PTE_R | PTE_W); + + // map kernel text executable and read-only. + mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE, + KERNBASE, PTE_R | PTE_X); - memmove(c->gdt, bootgdt, sizeof bootgdt); - dtr.limit = sizeof(c->gdt)-1; - dtr.base = (uint64) c->gdt; - lgdt((void *)&dtr.limit); + // map kernel data and the physical RAM we'll make use of. + mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext, + (uint64)etext, PTE_R | PTE_W); - // When executing a syscall instruction the CPU sets the SS selector - // to (star >> 32) + 8 and the CS selector to (star >> 32). - // When executing a sysret instruction the CPU sets the SS selector - // to (star >> 48) + 8 and the CS selector to (star >> 48) + 16. - uint64 star = ((((uint64)SEG_UCODE|0x3)- 16)<<48)|((uint64)(SEG_KCODE)<<32); - writemsr(MSR_STAR, star); - writemsr(MSR_LSTAR, (uint64)&sysentry); - writemsr(MSR_SFMASK, FL_TF | FL_IF); + // map the trampoline for trap entry/exit to + // the highest virtual address in the kernel. + mappages(kernel_pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampstart, PTE_R | PTE_X); - // Initialize cpu-local storage so that each core can easily - // find its struct cpu using %gs. - writegs(SEG_KDATA); - writemsr(MSR_GS_BASE, (uint64)c); - writemsr(MSR_GS_KERNBASE, (uint64)c); - c->cpu = c; + kvmswitch(); } -// Return the address of the PTE in page table pgdir +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvmswitch(void) +{ + w_satp(MAKE_SATP(kernel_pagetable)); +} + +// Return the address of the PTE in page table pagetable // that corresponds to virtual address va. If alloc!=0, // create any required page table pages. +// +// The risc-v Sv39 scheme has three levels of page table +// pages. A page table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +// 39..63 -- must be zero. +// 30..38 -- 9 bits of level-2 index. +// 21..39 -- 9 bits of level-1 index. +// 12..20 -- 9 bits of level-0 index. +// 0..12 -- 12 bits of byte offset within the page. static pte_t * -walkpgdir(pde_t *pml4, const void *va, int alloc) +walk(pagetable_t pagetable, const void *va, int alloc) { - pde_t *pgdir = pml4; - pde_t *pde; - int level; - - for (level = L_PML4; level > 0; level--) { - pde = &pgdir[PX(level, va)]; - if(*pde & PTE_P) - pgdir = (pte_t*)P2V(PTE_ADDR(*pde)); - else { - if(!alloc || (pgdir = (pde_t*)kalloc()) == 0) + if((uint64)va >= MAXVA) + panic("walk"); + + for(int level = 2; level > 0; level--) { + pte_t *pte = &pagetable[PX(level, va)]; + if(*pte & PTE_V) { + pagetable = (pagetable_t)PTE2PA(*pte); + } else { + if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) return 0; - memset(pgdir, 0, PGSIZE); - *pde = V2P(pgdir) | PTE_P | PTE_W | PTE_U; + memset(pagetable, 0, PGSIZE); + *pte = PA2PTE(pagetable) | PTE_V; } } - return &pgdir[PX(level, va)]; + return &pagetable[PX(0, va)]; } // Create PTEs for virtual addresses starting at va that refer to // physical addresses starting at pa. va and size might not // be page-aligned. -static int -mappages(pde_t *pgdir, void *va, uint64 size, uint64 pa, int perm) +void +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) { char *a, *last; pte_t *pte; - a = (char*)PGROUNDDOWN((uint64)va); - last = (char*)PGROUNDDOWN(((uint64)va) + size - 1); + a = (char*)PGROUNDDOWN(va); + last = (char*)PGROUNDDOWN(va + size - 1); for(;;){ - if((pte = walkpgdir(pgdir, a, 1)) == 0) - return -1; - if(*pte & PTE_P) + if((pte = walk(pagetable, a, 1)) == 0) + panic("mappages: walk"); + if(*pte & PTE_V) panic("remap"); - *pte = pa | perm | PTE_P; + *pte = PA2PTE(pa) | perm | PTE_V; if(a == last) break; a += PGSIZE; pa += PGSIZE; } - return 0; } -// There is one page table per process, plus one that's used when -// a CPU is not running any process (kpml4). The kernel uses the -// current process's page table during system calls and interrupts; -// page protection bits prevent user code from using the kernel's -// mappings. -// -// setupkvm() and exec() set up every page table like this: -// -// 0..KERNBASE: user memory (text+data+stack+heap), mapped to -// phys memory allocated by the kernel -// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) -// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) -// for the kernel's instructions and r/o data -// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, -// rw data + free physical memory -// 0xfe000000..0: mapped direct (devices such as ioapic) -// -// The kernel allocates physical memory for its heap and for user memory -// between V2P(end) and the end of physical memory (PHYSTOP) -// (directly addressable from end..P2V(PHYSTOP)). - -// This table defines the kernel's mappings, which are present in -// every process's page table. -static struct kmap { - void *virt; - uint64 phys_start; - uint64 phys_end; - int perm; -} kmap[] = { - { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space - { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata - { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory - { (void*)P2V(DEVSPACE), DEVSPACE, DEVSPACETOP, PTE_W}, // more devices -}; - -// Set up kernel part of a page table. -pde_t* -setupkvm(void) +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free) { - pde_t *pml4; - struct kmap *k; + char *a, *last; + pte_t *pte; + uint64 pa; - if((pml4 = (pde_t*)kalloc()) == 0) - return 0; - memset(pml4, 0, PGSIZE); - if (PHYSTOP > DEVSPACE) - panic("PHYSTOP too high"); - for(k = kmap; k < &kmap[NELEM(kmap)]; k++) { - if(mappages(pml4, k->virt, k->phys_end - k->phys_start, - (uint)k->phys_start, k->perm) < 0) { - freevm(pml4, 0); - return 0; + a = (char*)PGROUNDDOWN(va); + last = (char*)PGROUNDDOWN(va + size - 1); + for(;;){ + if((pte = walk(pagetable, a, 0)) == 0) + panic("unmappages: walk"); + if((*pte & PTE_V) == 0) + panic("unmappages: not mapped"); + if(PTE_FLAGS(*pte) == PTE_V) + panic("unmappages: not a leaf"); + if(do_free){ + pa = PTE2PA(*pte); + kfree((void*)pa); } + *pte = 0; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; } - return pml4; } -// Allocate one page table for the machine for the kernel address -// space for scheduler processes. -void -kvmalloc(void) +// create an empty user page table. +pagetable_t +uvmcreate() { - kpml4 = setupkvm(); - switchkvm(); + pagetable_t pagetable; + pagetable = (pagetable_t) kalloc(); + if(pagetable == 0) + panic("uvmcreate: out of memory"); + memset(pagetable, 0, PGSIZE); + return pagetable; } -// Switch h/w page table register to the kernel-only page table, -// for when no process is running. -void -switchkvm(void) -{ - lcr3(V2P(kpml4)); // switch to the kernel page table -} - - -// Switch TSS and h/w page table to correspond to process p. -void -switchuvm(struct proc *p) -{ - struct desctr dtr; - struct cpu *c; - - if(p == 0) - panic("switchuvm: no process"); - if(p->kstack == 0) - panic("switchuvm: no kstack"); - if(p->pgdir == 0) - panic("switchuvm: no pgdir"); - - pushcli(); - - c = mycpu(); - uint64 base = (uint64) &(c->ts); - c->gdt[SEG_TSS>>3] = SEGDESC(base, (sizeof(c->ts)-1), SEG_P|SEG_TSS64A); - c->gdt[(SEG_TSS>>3)+1] = SEGDESCHI(base); - c->ts.rsp[0] = (uint64) p->kstack + KSTACKSIZE; - c->ts.iomba = (ushort) 0xFFFF; - - dtr.limit = sizeof(c->gdt) - 1; - dtr.base = (uint64)c->gdt; - lgdt((void *)&dtr.limit); - - ltr(SEG_TSS); - - lcr3(V2P(p->pgdir)); // switch to process's address space - - popcli(); -} - -// Load the initcode into address 0 of pgdir. +// Load the user initcode into address 0 of pagetable, +// for the very first process. // sz must be less than a page. void -inituvm(pde_t *pgdir, char *init, uint sz) +uvminit(pagetable_t pagetable, char *src, uint sz) { char *mem; @@ -232,63 +164,8 @@ inituvm(pde_t *pgdir, char *init, uint sz) panic("inituvm: more than a page"); mem = kalloc(); memset(mem, 0, PGSIZE); - mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); - memmove(mem, init, sz); -} - -// Load a program segment into pgdir. addr must be page-aligned -// and the pages from addr to addr+sz must already be mapped. -int -loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) -{ - uint i, n; - uint64 pa; - pte_t *pte; - - if((uint64) addr % PGSIZE != 0) - panic("loaduvm: addr must be page aligned"); - for(i = 0; i < sz; i += PGSIZE){ - if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) - panic("loaduvm: address should exist"); - pa = PTE_ADDR(*pte); - if(sz - i < PGSIZE) - n = sz - i; - else - n = PGSIZE; - if(readi(ip, P2V(pa), offset+i, n) != n) - return -1; - } - return 0; -} - -// Allocate page tables and physical memory to grow process from oldsz to -// newsz, which need not be page aligned. Returns new size or 0 on error. -int -allocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ - char *mem; - uint64 a; - - if(newsz >= KERNBASE) - return 0; - if(newsz < oldsz) - return oldsz; - - a = PGROUNDUP(oldsz); - for(; a < newsz; a += PGSIZE){ - mem = kalloc(); - if(mem == 0){ - deallocuvm(pgdir, newsz, oldsz); - return 0; - } - memset(mem, 0, PGSIZE); - if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ - deallocuvm(pgdir, newsz, oldsz); - kfree(mem); - return 0; - } - } - return newsz; + mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); + memmove(mem, src, sz); } // Deallocate user pages to bring the process size from oldsz to @@ -296,153 +173,66 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz) // need to be less than oldsz. oldsz can be larger than the actual // process size. Returns the new process size. int -deallocuvm(pde_t *pml4, uint64 oldsz, uint64 newsz) +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) { - pte_t *pte; - uint64 a, pa; - if(newsz >= oldsz) return oldsz; - - a = PGROUNDUP(newsz); - for(; a < oldsz; a += PGSIZE){ - pte = walkpgdir(pml4, (char*)a, 0); - if(!pte) - continue; - else if((*pte & PTE_P) != 0){ - pa = PTE_ADDR(*pte); - if(pa == 0) - panic("kfree"); - char *v = P2V(pa); - kfree(v); - *pte = 0; - } - } + unmappages(pagetable, newsz, oldsz - newsz, 1); return newsz; } -// Recursively free a page table -void -freelevel(pde_t *pgtab, int level) { - int i; - pde_t *pd; - - if (level > 0) { - for(i = 0; i < NPDENTRIES; i++) { - if(pgtab[i] & PTE_P){ - pd = (pde_t*)P2V(PTE_ADDR(pgtab[i])); - freelevel(pd, level-1); - } +// Recursively free page table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ + // there are 2^9 = 512 PTEs in a page table. + for(int i = 0; i < 512; i++){ + pte_t pte = pagetable[i]; + if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ + // this PTE points to a lower-level page table. + uint64 child = PTE2PA(pte); + freewalk((pagetable_t)child); + pagetable[i] = 0; + } else if(pte & PTE_V){ + // XXX trampoline pages... + panic("freewalk: leaf"); } } - kfree((char*)pgtab); + kfree((void*)pagetable); } -// Free all the physical memory pages -// in the user part and page table +// Free user memory pages, +// then free page table pages. void -freevm(pde_t *pml4, uint64 sz) +uvmfree(pagetable_t pagetable, uint64 sz) { - if(pml4 == 0) - panic("freevm: no pgdir"); - - deallocuvm(pml4, sz, 0); - freelevel(pml4, L_PML4); + unmappages(pagetable, 0, sz, 1); + freewalk(pagetable); } -// Clear PTE_U on a page. Used to create an inaccessible -// page beneath the user stack. +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. void -clearpteu(pde_t *pgdir, char *uva) +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) { pte_t *pte; - - pte = walkpgdir(pgdir, uva, 0); - if(pte == 0) - panic("clearpteu"); - *pte &= ~PTE_U; -} - -// Given a parent process's page table, create a copy -// of it for a child. -pde_t* -copyuvm(pde_t *pgdir, uint sz) -{ - pde_t *d; - pte_t *pte; uint64 pa, i; uint flags; char *mem; - if((d = setupkvm()) == 0) - return 0; for(i = 0; i < sz; i += PGSIZE){ - if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) + if((pte = walk(old, (void *) i, 0)) == 0) panic("copyuvm: pte should exist"); - if(!(*pte & PTE_P)) + if((*pte & PTE_V) == 0) panic("copyuvm: page not present"); - pa = PTE_ADDR(*pte); + pa = PTE2PA(*pte); flags = PTE_FLAGS(*pte); if((mem = kalloc()) == 0) - goto bad; - memmove(mem, (char*)P2V(pa), PGSIZE); - if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { - kfree(mem); - goto bad; - } + panic("uvmcopy: kalloc failed"); + memmove(mem, (char*)pa, PGSIZE); + mappages(new, i, PGSIZE, (uint64)mem, flags); } - return d; - -bad: - freevm(d, sz); - return 0; } - -//PAGEBREAK! -// Map user virtual address to kernel address. -char* -uva2ka(pde_t *pgdir, char *uva) -{ - pte_t *pte; - - pte = walkpgdir(pgdir, uva, 0); - if((*pte & PTE_P) == 0) - return 0; - if((*pte & PTE_U) == 0) - return 0; - return (char*)P2V(PTE_ADDR(*pte)); -} - -// Copy len bytes from p to user address va in page table pgdir. -// Most useful when pgdir is not the current page table. -// uva2ka ensures this only works for PTE_U pages. -int -copyout(pde_t *pgdir, uint va, void *p, uint len) -{ - char *buf, *pa0; - uint64 n, va0; - - buf = (char*)p; - while(len > 0){ - va0 = (uint)PGROUNDDOWN(va); - pa0 = uva2ka(pgdir, (char*)va0); - if(pa0 == 0) - return -1; - n = PGSIZE - (va - va0); - if(n > len) - n = len; - memmove(pa0 + (va - va0), buf, n); - len -= n; - buf += n; - va = va0 + PGSIZE; - } - return 0; -} - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - diff --git a/x86.h b/x86.h deleted file mode 100644 index 1ae64ac..0000000 --- a/x86.h +++ /dev/null @@ -1,198 +0,0 @@ -// Routines to let C code use special x86 instructions. - -#ifndef __ASSEMBLER__ - -static inline uchar -inb(ushort port) -{ - uchar data; - - asm volatile("in %1,%0" : "=a" (data) : "d" (port)); - return data; -} - -static inline void -insl(int port, void *addr, int cnt) -{ - asm volatile("cld; rep insl" : - "=D" (addr), "=c" (cnt) : - "d" (port), "0" (addr), "1" (cnt) : - "memory", "cc"); -} - -static inline void -outb(ushort port, uchar data) -{ - asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outw(ushort port, ushort data) -{ - asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outsl(int port, const void *addr, int cnt) -{ - asm volatile("cld; rep outsl" : - "=S" (addr), "=c" (cnt) : - "d" (port), "0" (addr), "1" (cnt) : - "cc"); -} - -static inline void -stosb(void *addr, int data, int cnt) -{ - asm volatile("cld; rep stosb" : - "=D" (addr), "=c" (cnt) : - "0" (addr), "1" (cnt), "a" (data) : - "memory", "cc"); -} - -static inline void -stosl(void *addr, int data, int cnt) -{ - asm volatile("cld; rep stosl" : - "=D" (addr), "=c" (cnt) : - "0" (addr), "1" (cnt), "a" (data) : - "memory", "cc"); -} - -static inline void -lgdt(void *p) -{ - asm volatile("lgdt (%0)" : : "r" (p) : "memory"); -} - -static inline void -lidt(void *p) -{ - asm volatile("lidt (%0)" : : "r" (p) : "memory"); -} - -static inline void -ltr(ushort sel) -{ - asm volatile("ltr %0" : : "r" (sel)); -} - -static inline uint64 -readeflags(void) -{ - uint64 eflags; - asm volatile("pushf; pop %0" : "=r" (eflags)); - return eflags; -} - -static inline void -loadgs(ushort v) -{ - asm volatile("movw %0, %%gs" : : "r" (v)); -} - -static inline void -cli(void) -{ - asm volatile("cli"); -} - -static inline void -sti(void) -{ - asm volatile("sti"); -} - -static inline uint -xchg(volatile uint *addr, uint newval) -{ - uint result; - - // The + in "+m" denotes a read-modify-write operand. - asm volatile("lock; xchgl %0, %1" : - "+m" (*addr), "=a" (result) : - "1" (newval) : - "cc"); - return result; -} - -static inline uint -rcr2(void) -{ - uint64 val; - asm volatile("mov %%cr2,%0" : "=r" (val)); - return val; -} - -static inline void -lcr3(uint64 val) -{ - asm volatile("mov %0,%%cr3" : : "r" (val)); -} - -static inline void -writegs(uint16 v) -{ - __asm volatile("movw %0, %%gs" : : "r" (v)); -} - - -//PAGEBREAK: 36 -// Layout of the trap frame built on the stack by the -// hardware and by trapasm.S, and passed to trap(). -struct trapframe { - uint64 rax; - uint64 rbx; - uint64 rcx; - uint64 rdx; - uint64 rbp; - uint64 rsi; - uint64 rdi; - uint64 r8; - uint64 r9; - uint64 r10; - uint64 r11; - uint64 r12; - uint64 r13; - uint64 r14; - uint64 r15; - uint64 trapno; - uint64 err; - uint64 rip; - uint16 cs; - uint16 padding[3]; - uint64 rflags; - uint64 rsp; - uint64 ss; -}__attribute__((packed)); - -struct sysframe { - // arguments - uint64 rdi; - uint64 rsi; - uint64 rdx; - uint64 r10; - uint64 r8; - uint64 r9; - - // callee-saved registers - uint64 r15; - uint64 r14; - uint64 r13; - uint64 r12; - uint64 rbx; - uint64 rbp; - - // return value - uint64 rax; - - // syscall registers - uint64 r11; // eflags - uint64 rcx; // rip - uint64 rsp; - -}__attribute__((packed)); - -#endif - -#define TF_CS 144 // offset in trapframe for saved cs