From f9e35d4f093bda55fa8cad5d01b710453349c42a Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 26 Jun 2025 01:07:14 +0200 Subject: [PATCH] Diet xv6, currently broken --- defs.h | 210 ++++++++++++++++++ entry.S | 65 ++++++ kernel.c | 37 ++++ kernel.ld | 78 +++++++ kernelvec.S | 131 +++++++++++ memlayout.h | 71 ++++++ param.h | 15 ++ proc.c | 622 ++++++++++++++++++++++++++++++++++++++++++++++++++++ proc.h | 115 ++++++++++ riscv.h | 383 ++++++++++++++++++++++++++++++++ spinlock.c | 113 ++++++++++ spinlock.h | 52 +++++ start.c | 84 +++++++ trap.c | 200 +++++++++++++++++ types.h | 8 + 15 files changed, 2184 insertions(+) create mode 100644 defs.h create mode 100644 entry.S create mode 100644 kernel.c create mode 100644 kernel.ld create mode 100644 kernelvec.S create mode 100644 memlayout.h create mode 100644 param.h create mode 100644 proc.c create mode 100644 proc.h create mode 100644 riscv.h create mode 100644 spinlock.c create mode 100644 spinlock.h create mode 100644 start.c create mode 100644 trap.c create mode 100644 types.h diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..6bf6f0c --- /dev/null +++ b/defs.h @@ -0,0 +1,210 @@ +#pragma once + +#include "riscv.h" +#include "spinlock.h" +#include "types.h" + +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct sleeplock; +struct stat; +struct superblock; + +// bio.c +void binit(void); +struct buf *bread(u32, u32); +void brelse(struct buf *); +void bwrite(struct buf *); +void bpin(struct buf *); +void bunpin(struct buf *); + +// console.c +void consoleinit(void); +void consoleintr(int); +void consputc(int); + +// exec.c +int exec(char *, char **); + +// file.c +struct file *filealloc(void); +void fileclose(struct file *); +struct file *filedup(struct file *); +void fileinit(void); +int fileread(struct file *, u64, int n); +int filestat(struct file *, u64 addr); +int filewrite(struct file *, u64, int n); + +// fs.c +void fsinit(int); +int dirlink(struct inode *, char *, u32); +struct inode *dirlookup(struct inode *, char *, u32 *); +struct inode *ialloc(u32, short); +struct inode *idup(struct inode *); +void iinit(); +void ilock(struct inode *); +void iput(struct inode *); +void iunlock(struct inode *); +void iunlockput(struct inode *); +void iupdate(struct inode *); +int namecmp(const char *, const char *); +struct inode *namei(char *); +struct inode *nameiparent(char *, char *); +int readi(struct inode *, int, u64, u32, u32); +void stati(struct inode *, struct stat *); +int writei(struct inode *, int, u64, u32, u32); +void itrunc(struct inode *); + +// ramdisk.c +void ramdiskinit(void); +void ramdiskintr(void); +void ramdiskrw(struct buf *); + +/** + * Kernel memory allocator + * + * Allocate one 4096-byte page of physical memory. + * Returns a pointer that the kernel can use. + * Returns 0 if the memory cannot be allocated. + * See: kalloc.c + */ +void *kalloc(void); + +/** + * Kernel memory allocator + * + * Free the page of physical memory pointed at by pa, + * which normally should have been returned by a + * call to kalloc(). (The exception is when + * initializing the allocator; see kinit above.) + * See: kalloc.c + */ +void kfree(void *); + +/** + * Initialize kernel memory allocator + * + * Called by main() on the way to the kernel's main loop. + * See: kalloc.c + */ +void kinit(void); + +// log.c +void initlog(int, struct superblock *); +void log_write(struct buf *); +void begin_op(void); +void end_op(void); + +// pipe.c +int pipealloc(struct file **, struct file **); +void pipeclose(struct pipe *, int); +int piperead(struct pipe *, u64, int); +int pipewrite(struct pipe *, u64, int); + +// printf.c +void printf(char *, ...); +void panic(char *) __attribute__((noreturn)); +void printfinit(void); + +// proc.c +int cpuid(void); +void exit(int); +int fork(void); +int growproc(int); +void proc_mapstacks(pagetable_t); +pagetable_t proc_pagetable(struct proc *); +void proc_freepagetable(pagetable_t, u64); +int kill(int); +int killed(struct proc *); +void setkilled(struct proc *); +struct cpu *mycpu(void); +struct cpu *getmycpu(void); +struct proc *myproc(); +void procinit(void); +void scheduler(void) __attribute__((noreturn)); +void sched(void); +void sleep(void *, struct spinlock *); +void userinit(void); +int wait(u64); +void wakeup(void *); +void yield(void); +int either_copyout(int user_dst, u64 dst, void *src, u64 len); +int either_copyin(void *dst, int user_src, u64 src, u64 len); +void procdump(void); + +// swtch.S +void swtch(struct context *, struct context *); + +// sleeplock.c +void acquiresleep(struct sleeplock *); +void releasesleep(struct sleeplock *); +int holdingsleep(struct sleeplock *); +void initsleeplock(struct sleeplock *, char *); + +// string.c +int memcmp(const void *, const void *, u32); +void *memmove(void *, const void *, u32); +void *memset(void *, int, u32); +char *safestrcpy(char *, const char *, int); +int strlen(const char *); +int strncmp(const char *, const char *, u32); +char *strncpy(char *, const char *, int); + +// syscall.c +void argint(int, int *); +int argstr(int, char *, int); +void argaddr(int, u64 *); +int fetchstr(u64, char *, int); +int fetchaddr(u64, u64 *); +void syscall(); + +// trap.c +extern u32 ticks; +void trapinit(void); +void trapinithart(void); +extern struct spinlock tickslock; +void usertrapret(void); + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); +void uartputc_sync(int); +int uartgetc(void); + +// vm.c +void kvminit(void); +void kvminithart(void); +void kvmmap(pagetable_t, u64, u64, u64, int); +int mappages(pagetable_t, u64, u64, u64, int); +pagetable_t uvmcreate(void); +void uvmfirst(pagetable_t, u8 *, u32); +u64 uvmalloc(pagetable_t, u64, u64, int); +u64 uvmdealloc(pagetable_t, u64, u64); +int uvmcopy(pagetable_t, pagetable_t, u64); +void uvmfree(pagetable_t, u64); +void uvmunmap(pagetable_t, u64, u64, int); +void uvmclear(pagetable_t, u64); +pte_t *walk(pagetable_t, u64, int); +u64 walkaddr(pagetable_t, u64); +int copyout(pagetable_t, u64, char *, u64); +int copyin(pagetable_t, char *, u64, u64); +int copyinstr(pagetable_t, char *, u64, u64); + +// plic.c +void plicinit(void); +void plicinithart(void); +int plic_claim(void); +void plic_complete(int); + +// virtio_disk.c +void virtio_disk_init(void); +void virtio_disk_rw(struct buf *, int); +void virtio_disk_intr(void); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x) / sizeof((x)[0])) diff --git a/entry.S b/entry.S new file mode 100644 index 0000000..0a2e5d9 --- /dev/null +++ b/entry.S @@ -0,0 +1,65 @@ +# For a quick reference on RISC-V assembly: +# https://risc-v.guru/instructions/ + +# Kernel entry point +# +# qemu -kernel loads the kernel at 0x80000000 +# and causes each hart (i.e. CPU) to jump there. +# kernel.ld causes the following code to +# be placed at 0x80000000. + +.section .text +.global _entry +_entry: + # Clear all the registers. + li x1, 0x0 + li x2, 0x0 + li x3, 0x0 + li x4, 0x0 + li x5, 0x0 + li x6, 0x0 + li x7, 0x0 + li x8, 0x0 + li x9, 0x0 + li x10, 0x0 + li x11, 0x0 + li x12, 0x0 + li x13, 0x0 + li x14, 0x0 + li x15, 0x0 + li x16, 0x0 + li x17, 0x0 + li x18, 0x0 + li x19, 0x0 + li x20, 0x0 + li x21, 0x0 + li x22, 0x0 + li x23, 0x0 + li x24, 0x0 + li x25, 0x0 + li x26, 0x0 + li x27, 0x0 + li x28, 0x0 + li x29, 0x0 + li x30, 0x0 + li x31, 0x0 + + # set up a stack for C. + # stack0 is declared in start.c, + # with a 4096-byte stack per CPU. + # sp = stack0 + (hartid * 4096) + la sp, stack0 + li a0, 1024*4 # a0 = 4096 + + # Control and Status Register Read + csrr a1, mhartid # a1 = unique hart (core) id + addi a1, a1, 1 # a1 += 1 + mul a0, a0, a1 # a0 *= a1, aka a0 = 4096 * (hartid + 1), the base of the stack for this hart + add sp, sp, a0 # sp += a0, stack pointer is now properly configured + + # jump to start() in start.c + call start + +# Infinite spin loop. +spin: + j spin diff --git a/kernel.c b/kernel.c new file mode 100644 index 0000000..d712a30 --- /dev/null +++ b/kernel.c @@ -0,0 +1,37 @@ +// volatile unsigned int *uart = (unsigned int *)0x10000000; +// +// void _start() __attribute__((naked, section(".text"))); +// +// #define NCPU 1 +// __attribute__((aligned(16))) char stack0[4096 * NCPU]; +// +// void uart_putc(char c) { *uart = c; } +// +// void uart_print(char *str, int len) { +// for (int i = 0; i < len; i++) { +// uart_putc(str[i]); +// } +// } +// +// // void _start() { +// // void _entry() { +// // asm volatile("la sp, stack0"); +// // +// // *uart = 'H'; +// // +// // // *uart = 'i'; +// // // *uart = '\n'; +// // uart_putc('h'); +// // // uart_print("Hey", 3); +// // +// // while (1) {}; +// // } +// // +// // int main(void) { +// // +// // } +// int strlen(char *str) { +// int cnt = 0; +// while (*str != '\n') cnt++; +// return cnt; +// } diff --git a/kernel.ld b/kernel.ld new file mode 100644 index 0000000..b355405 --- /dev/null +++ b/kernel.ld @@ -0,0 +1,78 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ + /* + * ensure that entry.S / _entry is at 0x80000000, + * where qemu's -kernel jumps. + */ + . = 0x80000000; + + /* + * This section contains the code. This is, the machine language instructions + * that will be executed by the processor. In here we will find symbols + * that reference the functions in your object file. + */ + .text : { + /* Match any section that starts with .text. */ + *(.text*) + + /* Align the next section to a 4KB (page) boundary. */ + . = ALIGN(0x1000); + + /* Put the trampoline code here. */ + _trampoline = .; + + /* Match any section that starts with .trampsec. */ + *(trampsec) + + /* Align the next section to a 4KB (page) boundary. */ + . = ALIGN(0x1000); + + /* Assert that the trampoline code is exactly 4KB (page) in size. */ + ASSERT(. - _trampoline == 0x1000, "error: trampoline larger than one page"); + + /* Define symbol etext to be the current location. */ + PROVIDE(etext = .); + } + + /* + * This contains any data that is marked as read only. + * It is not unusual to find this data interleaved with the text section. + */ + .rodata : { + /* Align on quadword boundary. */ + . = ALIGN(16); + *(.srodata*) /* do not need to distinguish this from .rodata */ + . = ALIGN(16); + *(.rodata*) + } + + /* + * This section contains initialized global and static variables. + * Any global object that has been explicitly initialized to a value different than zero. + */ + .data : { + . = ALIGN(16); + *(.sdata*) /* do not need to distinguish this from .data */ + . = ALIGN(16); + *(.data*) + } + + /* + * Contains all uninitialized global and static var iables. These are usually + * zeroed out by the startup code before we reach the main function. However, + * In an embedded system we usually provide our own startup code, which means + * we need to remember to do this ourselves. + */ + .bss : { + . = ALIGN(16); + *(.sbss*) /* do not need to distinguish this from .bss */ + . = ALIGN(16); + *(.bss*) + } + + /* Define symbol end as current location, note that this is not aligned, see vm.c */ + PROVIDE(end = .); +} diff --git a/kernelvec.S b/kernelvec.S new file mode 100644 index 0000000..008c831 --- /dev/null +++ b/kernelvec.S @@ -0,0 +1,131 @@ +# For a quick reference on RISC-V assembly: +# https://risc-v.guru/instructions/ + +# Kernel trap handling +# +# interrupts and exceptions while in supervisor +# mode come here. +# +# the current stack is a kernel stack. +# push all registers, call kerneltrap(). +# when kerneltrap() returns, restore registers, return. + +.globl kerneltrap +.globl kernelvec +.align 4 +kernelvec: + # make room to save registers. + addi sp, sp, -256 + + # save the registers. + sd ra, 0(sp) + sd sp, 8(sp) + sd gp, 16(sp) + sd tp, 24(sp) + sd t0, 32(sp) + sd t1, 40(sp) + sd t2, 48(sp) + sd s0, 56(sp) + sd s1, 64(sp) + sd a0, 72(sp) + sd a1, 80(sp) + sd a2, 88(sp) + sd a3, 96(sp) + sd a4, 104(sp) + sd a5, 112(sp) + sd a6, 120(sp) + sd a7, 128(sp) + sd s2, 136(sp) + sd s3, 144(sp) + sd s4, 152(sp) + sd s5, 160(sp) + sd s6, 168(sp) + sd s7, 176(sp) + sd s8, 184(sp) + sd s9, 192(sp) + sd s10, 200(sp) + sd s11, 208(sp) + sd t3, 216(sp) + sd t4, 224(sp) + sd t5, 232(sp) + sd t6, 240(sp) + + # call the C trap handler in trap.c + call kerneltrap + + # restore registers. + ld ra, 0(sp) + ld sp, 8(sp) + ld gp, 16(sp) + # Skip tp (thread pointer aka x4) (contains hartid) + # in case we moved CPUs + ld t0, 32(sp) + ld t1, 40(sp) + ld t2, 48(sp) + ld s0, 56(sp) + ld s1, 64(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld s2, 136(sp) + ld s3, 144(sp) + ld s4, 152(sp) + ld s5, 160(sp) + ld s6, 168(sp) + ld s7, 176(sp) + ld s8, 184(sp) + ld s9, 192(sp) + ld s10, 200(sp) + ld s11, 208(sp) + ld t3, 216(sp) + ld t4, 224(sp) + ld t5, 232(sp) + ld t6, 240(sp) + + addi sp, sp, 256 + + # return to whatever we were doing in the kernel. + sret + +# machine-mode timer interrupt +# +# See: start.c for timervec declaration +# extern void timervec(); + +.globl timervec +.align 4 +timervec: + # start.c has set up the memory that mscratch points to: + # scratch[0,8,16] : register save area. + # scratch[24] : address of CLINT's MTIMECMP register. + # scratch[32] : desired interval between interrupts. + + csrrw a0, mscratch, a0 + sd a1, 0(a0) + sd a2, 8(a0) + sd a3, 16(a0) + + # schedule the next timer interrupt + # by adding interval to mtimecmp. + ld a1, 24(a0) # CLINT_MTIMECMP(hart) + ld a2, 32(a0) # interval + ld a3, 0(a1) + add a3, a3, a2 + sd a3, 0(a1) + + # arrange for a supervisor software interrupt + # after this handler returns. + li a1, 2 + csrw sip, a1 + + ld a3, 16(a0) + ld a2, 8(a0) + ld a1, 0(a0) + csrrw a0, mscratch, a0 + + mret diff --git a/memlayout.h b/memlayout.h new file mode 100644 index 0000000..813407f --- /dev/null +++ b/memlayout.h @@ -0,0 +1,71 @@ +#pragma once + +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0 +// 10001000 -- virtio disk +// 80000000 -- boot ROM jumps here in machine mode +// -kernel loads the kernel here +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000L +#define UART0_IRQ 10 + +#define QEMU_POWER 0x100000 + +// virtio mmio interface +#define VIRTIO0 0x10001000 +#define VIRTIO0_IRQ 1 + +// core local interruptor (CLINT), which contains the timer. +#define CLINT 0x2000000L +#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8 * (hartid)) +#define CLINT_MTIME (CLINT + 0xBFF8) // cycles since boot. + +// qemu puts platform-level interrupt controller (PLIC) here. +#define PLIC 0x0c000000L +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart) * 0x100) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart) * 0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart) * 0x2000) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart) * 0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart) * 0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart) * 0x2000) + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 128 * 1024 * 1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) + +// map kernel stacks beneath the trampoline, +// each surrounded by invalid guard pages. +#define KSTACK(p) (TRAMPOLINE - ((p) + 1) * 2 * PGSIZE) + +// User memory layout. +// Address zero first: +// text +// original data and bss +// fixed-size stack +// expandable heap +// ... +// TRAPFRAME (p->trapframe, used by the trampoline) +// TRAMPOLINE (the same page as in the kernel) +#define TRAPFRAME (TRAMPOLINE - PGSIZE) diff --git a/param.h b/param.h new file mode 100644 index 0000000..741f02f --- /dev/null +++ b/param.h @@ -0,0 +1,15 @@ +#pragma once + +#define NPROC 64 // maximum number of processes +#define NCPU 8 // maximum number of CPUs +#define NOFILE 16 // open files per process +#define NFILE 100 // open files per system +#define NINODE 50 // maximum number of active i-nodes +#define NDEV 10 // maximum major device number +#define ROOTDEV 1 // device number of file system root disk +#define MAXARG 32 // max exec arguments +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS * 3) // max data blocks in on-disk log +#define NBUF (MAXOPBLOCKS * 3) // size of disk block cache +#define FSSIZE 2000 // size of file system in blocks +#define MAXPATH 128 // maximum file path name diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..cdcf836 --- /dev/null +++ b/proc.c @@ -0,0 +1,622 @@ +#include "proc.h" +#include "defs.h" +#include "memlayout.h" +#include "param.h" +#include "riscv.h" +#include "spinlock.h" +#include "types.h" + +struct cpu cpus[NCPU]; + +struct proc proc[NPROC]; + +struct proc *initproc; + +int nextpid = 1; +struct spinlock pid_lock; + +extern void forkret(void); +static void freeproc(struct proc *p); + +extern char trampoline[]; // trampoline.S + +// helps ensure that wakeups of wait()ing +// parents are not lost. helps obey the +// memory model when using p->parent. +// must be acquired before any p->lock. +struct spinlock wait_lock; + +// Allocate a page for each process's kernel stack. +// Map it high in memory, followed by an invalid +// guard page. +void proc_mapstacks(pagetable_t kpgtbl) { + struct proc *p; + + for (p = proc; p < &proc[NPROC]; p++) { + char *pa = kalloc(); + if (pa == 0) + panic("kalloc"); + u64 va = KSTACK((int)(p - proc)); + kvmmap(kpgtbl, va, (u64)pa, PGSIZE, PTE_R | PTE_W); + } +} + +// initialize the proc table. +void procinit(void) { + struct proc *p; + + initlock(&pid_lock, "nextpid"); + initlock(&wait_lock, "wait_lock"); + for (p = proc; p < &proc[NPROC]; p++) { + initlock(&p->lock, "proc"); + p->state = UNUSED; + p->kstack = KSTACK((int)(p - proc)); + } +} + +// Must be called with interrupts disabled, +// to prevent race with process being moved +// to a different CPU. +int cpuid() { + int id = r_tp(); + return id; +} + +// Return this CPU's cpu struct. +// Interrupts must be disabled. +struct cpu *mycpu(void) { + int id = cpuid(); + struct cpu *c = &cpus[id]; + return c; +} + +// Return the current struct proc *, or zero if none. +struct proc *myproc(void) { + push_off(); + struct cpu *c = mycpu(); + struct proc *p = c->proc; + pop_off(); + return p; +} + +int allocpid() { + int pid; + + acquire(&pid_lock); + pid = nextpid; + nextpid = nextpid + 1; + release(&pid_lock); + + return pid; +} + +// Look in the process table for an UNUSED proc. +// If found, initialize state required to run in the kernel, +// and return with p->lock held. +// If there are no free procs, or a memory allocation fails, return 0. +static struct proc *allocproc(void) { + struct proc *p; + + for (p = proc; p < &proc[NPROC]; p++) { + p->mask = 0; + acquire(&p->lock); + if (p->state == UNUSED) { + goto found; + } else { + release(&p->lock); + } + } + return 0; + +found: + p->pid = allocpid(); + p->state = USED; + + // Allocate a trapframe page. + if ((p->trapframe = (struct trapframe *)kalloc()) == 0) { + freeproc(p); + release(&p->lock); + return 0; + } + + // An empty user page table. + p->pagetable = proc_pagetable(p); + if (p->pagetable == 0) { + freeproc(p); + release(&p->lock); + return 0; + } + + // Set up new context to start executing at forkret, + // which returns to user space. + memset(&p->context, 0, sizeof(p->context)); + p->context.ra = (u64)forkret; + p->context.sp = p->kstack + PGSIZE; + + return p; +} + +// free a proc structure and the data hanging from it, +// including user pages. +// p->lock must be held. +static void freeproc(struct proc *p) { + if (p->trapframe) + kfree((void *)p->trapframe); + p->trapframe = 0; + if (p->pagetable) + proc_freepagetable(p->pagetable, p->sz); + p->pagetable = 0; + p->sz = 0; + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->chan = 0; + p->killed = 0; + p->xstate = 0; + p->state = UNUSED; +} + +// Create a user page table for a given process, with no user memory, +// but with trampoline and trapframe pages. +pagetable_t proc_pagetable(struct proc *p) { + pagetable_t pagetable; + + // An empty page table. + pagetable = uvmcreate(); + if (pagetable == 0) + return 0; + + // map the trampoline code (for system call return) + // at the highest user virtual address. + // only the supervisor uses it, on the way + // to/from user space, so not PTE_U. + if (mappages(pagetable, TRAMPOLINE, PGSIZE, (u64)trampoline, + PTE_R | PTE_X) < 0) { + uvmfree(pagetable, 0); + return 0; + } + + // map the trapframe page just below the trampoline page, for + // trampoline.S. + if (mappages(pagetable, TRAPFRAME, PGSIZE, (u64)(p->trapframe), + PTE_R | PTE_W) < 0) { + uvmunmap(pagetable, TRAMPOLINE, 1, 0); + uvmfree(pagetable, 0); + return 0; + } + + return pagetable; +} + +// Free a process's page table, and free the +// physical memory it refers to. +void proc_freepagetable(pagetable_t pagetable, u64 sz) { + uvmunmap(pagetable, TRAMPOLINE, 1, 0); + uvmunmap(pagetable, TRAPFRAME, 1, 0); + uvmfree(pagetable, sz); +} + +// a user program that calls exec("/init") +// assembled from ../user/initcode.S +// od -t xC ../user/initcode +u8 initcode[] = {0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x45, 0x02, 0x97, + 0x05, 0x00, 0x00, 0x93, 0x85, 0x35, 0x02, 0x93, 0x08, + 0x70, 0x00, 0x73, 0x00, 0x00, 0x00, 0x93, 0x08, 0x20, + 0x00, 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0x9f, 0xff, + 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x24, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +// Set up first user process. +void userinit(void) { + struct proc *p; + + p = allocproc(); + initproc = p; + + // allocate one user page and copy initcode's instructions + // and data into it. + uvmfirst(p->pagetable, initcode, sizeof(initcode)); + p->sz = PGSIZE; + + // prepare for the very first "return" from kernel to user. + p->trapframe->epc = 0; // user program counter + p->trapframe->sp = PGSIZE; // user stack pointer + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + p->state = RUNNABLE; + + release(&p->lock); +} + +// Grow or shrink user memory by n bytes. +// Return 0 on success, -1 on failure. +int growproc(int n) { + u64 sz; + struct proc *p = myproc(); + + sz = p->sz; + if (n > 0) { + if ((sz = uvmalloc(p->pagetable, sz, sz + n, PTE_W)) == 0) { + return -1; + } + } else if (n < 0) { + sz = uvmdealloc(p->pagetable, sz, sz + n); + } + p->sz = sz; + return 0; +} + +// Create a new process, copying the parent. +// Sets up child kernel stack to return as if from fork() system call. +int fork(void) { + int i, pid; + struct proc *np; + struct proc *p = myproc(); + + // Allocate process. + if ((np = allocproc()) == 0) { + return -1; + } + + // Copy user memory from parent to child. + if (uvmcopy(p->pagetable, np->pagetable, p->sz) < 0) { + freeproc(np); + release(&np->lock); + return -1; + } + np->sz = p->sz; + + // copy saved user registers. + *(np->trapframe) = *(p->trapframe); + + // Cause fork to return 0 in the child. + np->trapframe->a0 = 0; + + // increment reference counts on open file descriptors. + for (i = 0; i < NOFILE; i++) + if (p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwd = idup(p->cwd); + + safestrcpy(np->name, p->name, sizeof(p->name)); + + pid = np->pid; + np->mask = 0; + + release(&np->lock); + + acquire(&wait_lock); + np->parent = p; + release(&wait_lock); + + acquire(&np->lock); + np->state = RUNNABLE; + release(&np->lock); + + return pid; +} + +// Pass p's abandoned children to init. +// Caller must hold wait_lock. +void reparent(struct proc *p) { + struct proc *pp; + + for (pp = proc; pp < &proc[NPROC]; pp++) { + if (pp->parent == p) { + pp->parent = initproc; + wakeup(initproc); + } + } +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void exit(int status) { + struct proc *p = myproc(); + + if (p == initproc) + panic("init exiting"); + + // Close all open files. + for (int fd = 0; fd < NOFILE; fd++) { + if (p->ofile[fd]) { + struct file *f = p->ofile[fd]; + fileclose(f); + p->ofile[fd] = 0; + } + } + + begin_op(); + iput(p->cwd); + end_op(); + p->cwd = 0; + + acquire(&wait_lock); + + // Give any children to init. + reparent(p); + + // Parent might be sleeping in wait(). + wakeup(p->parent); + + acquire(&p->lock); + + p->xstate = status; + p->state = ZOMBIE; + + release(&wait_lock); + + // Jump into the scheduler, never to return. + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int wait(u64 addr) { + struct proc *pp; + int havekids, pid; + struct proc *p = myproc(); + + acquire(&wait_lock); + + for (;;) { + // Scan through table looking for exited children. + havekids = 0; + for (pp = proc; pp < &proc[NPROC]; pp++) { + if (pp->parent == p) { + // make sure the child isn't still in exit() or swtch(). + acquire(&pp->lock); + + havekids = 1; + if (pp->state == ZOMBIE) { + // Found one. + pid = pp->pid; + if (addr != 0 && + copyout(p->pagetable, addr, (char *)&pp->xstate, + sizeof(pp->xstate)) < 0) { + release(&pp->lock); + release(&wait_lock); + return -1; + } + freeproc(pp); + release(&pp->lock); + release(&wait_lock); + return pid; + } + release(&pp->lock); + } + } + + // No point waiting if we don't have any children. + if (!havekids || killed(p)) { + release(&wait_lock); + return -1; + } + + // Wait for a child to exit. + sleep(p, &wait_lock); // DOC: wait-sleep + } +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run. +// - swtch to start running that process. +// - eventually that process transfers control +// via swtch back to the scheduler. +void scheduler(void) { + struct proc *p; + struct cpu *c = mycpu(); + + c->proc = 0; + for (;;) { + // Avoid deadlock by ensuring that devices can interrupt. + intr_on(); + + for (p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if (p->state == RUNNABLE) { + // Switch to chosen process. It is the process's job + // to release its lock and then reacquire it + // before jumping back to us. + p->state = RUNNING; + c->proc = p; + swtch(&c->context, &p->context); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&p->lock); + } + } +} + +// Switch to scheduler. Must hold only p->lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void sched(void) { + int intena; + struct proc *p = myproc(); + + if (!holding(&p->lock)) + panic("sched p->lock"); + if (mycpu()->noff != 1) + panic("sched locks"); + if (p->state == RUNNING) + panic("sched running"); + if (intr_get()) + panic("sched interruptible"); + + intena = mycpu()->intena; + swtch(&p->context, &mycpu()->context); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void yield(void) { + struct proc *p = myproc(); + acquire(&p->lock); + p->state = RUNNABLE; + sched(); + release(&p->lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void forkret(void) { + static int first = 1; + + // Still holding p->lock from scheduler. + release(&myproc()->lock); + + if (first) { + // File system initialization must be run in the context of a + // regular process (e.g., because it calls sleep), and thus cannot + // be run from main(). + first = 0; + fsinit(ROOTDEV); + } + + usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void sleep(void *chan, struct spinlock *lk) { + struct proc *p = myproc(); + + // Must acquire p->lock in order to + // change p->state and then call sched. + // Once we hold p->lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup locks p->lock), + // so it's okay to release lk. + + acquire(&p->lock); // DOC: sleeplock1 + release(lk); + + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + release(&p->lock); + acquire(lk); +} + +// Wake up all processes sleeping on chan. +// Must be called without any p->lock. +void wakeup(void *chan) { + struct proc *p; + + for (p = proc; p < &proc[NPROC]; p++) { + if (p != myproc()) { + acquire(&p->lock); + if (p->state == SLEEPING && p->chan == chan) { + p->state = RUNNABLE; + } + release(&p->lock); + } + } +} + +// Kill the process with the given pid. +// The victim won't exit until it tries to return +// to user space (see usertrap() in trap.c). +int kill(int pid) { + struct proc *p; + + for (p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if (p->pid == pid) { + p->killed = 1; + if (p->state == SLEEPING) { + // Wake process from sleep(). + p->state = RUNNABLE; + } + release(&p->lock); + return 0; + } + release(&p->lock); + } + return -1; +} + +void setkilled(struct proc *p) { + acquire(&p->lock); + p->killed = 1; + release(&p->lock); +} + +int killed(struct proc *p) { + int k; + + acquire(&p->lock); + k = p->killed; + release(&p->lock); + return k; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int either_copyout(int user_dst, u64 dst, void *src, u64 len) { + struct proc *p = myproc(); + if (user_dst) { + return copyout(p->pagetable, dst, src, len); + } else { + memmove((char *)dst, src, len); + return 0; + } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int either_copyin(void *dst, int user_src, u64 src, u64 len) { + struct proc *p = myproc(); + if (user_src) { + return copyin(p->pagetable, dst, src, len); + } else { + memmove(dst, (char *)src, len); + return 0; + } +} + +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void procdump(void) { + static char *states[] = { + [UNUSED] = "unused", [USED] = "used", [SLEEPING] = "sleep ", + [RUNNABLE] = "runble", [RUNNING] = "run ", [ZOMBIE] = "zombie"}; + struct proc *p; + char *state; + + printf("\n"); + for (p = proc; p < &proc[NPROC]; p++) { + if (p->state == UNUSED) + continue; + if (p->state >= 0 && p->state < NELEM(states) && states[p->state]) + state = states[p->state]; + else + state = "???"; + printf("%d %s %s", p->pid, state, p->name); + printf("\n"); + } +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..7803143 --- /dev/null +++ b/proc.h @@ -0,0 +1,115 @@ +#pragma once + +#include "param.h" +#include "riscv.h" +#include "spinlock.h" +#include "types.h" + +/** Saved registers for kernel context switches. */ +struct context { + u64 ra; + u64 sp; + + // callee-saved + u64 s0; + u64 s1; + u64 s2; + u64 s3; + u64 s4; + u64 s5; + u64 s6; + u64 s7; + u64 s8; + u64 s9; + u64 s10; + u64 s11; +}; + +/** Per-CPU state. */ +struct cpu { + struct proc *proc; // The process running on this cpu, or null. + struct context context; // swtch() here to enter scheduler(). + int noff; // Depth of push_off() nesting. + int intena; // Were interrupts enabled before push_off()? +}; + +extern struct cpu cpus[NCPU]; + +// per-process data for the trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// uservec in trampoline.S saves user registers in the trapframe, +// then initializes registers from the trapframe's +// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap. +// usertrapret() and userret in trampoline.S set up +// the trapframe's kernel_*, restore user registers from the +// trapframe, switch to the user page table, and enter user space. +// the trapframe includes callee-saved user registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. +struct trapframe { + /* 0 */ u64 kernel_satp; // kernel page table + /* 8 */ u64 kernel_sp; // top of process's kernel stack + /* 16 */ u64 kernel_trap; // usertrap() + /* 24 */ u64 epc; // saved user program counter + /* 32 */ u64 kernel_hartid; // saved kernel tp + /* 40 */ u64 ra; + /* 48 */ u64 sp; + /* 56 */ u64 gp; + /* 64 */ u64 tp; + /* 72 */ u64 t0; + /* 80 */ u64 t1; + /* 88 */ u64 t2; + /* 96 */ u64 s0; + /* 104 */ u64 s1; + /* 112 */ u64 a0; + /* 120 */ u64 a1; + /* 128 */ u64 a2; + /* 136 */ u64 a3; + /* 144 */ u64 a4; + /* 152 */ u64 a5; + /* 160 */ u64 a6; + /* 168 */ u64 a7; + /* 176 */ u64 s2; + /* 184 */ u64 s3; + /* 192 */ u64 s4; + /* 200 */ u64 s5; + /* 208 */ u64 s6; + /* 216 */ u64 s7; + /* 224 */ u64 s8; + /* 232 */ u64 s9; + /* 240 */ u64 s10; + /* 248 */ u64 s11; + /* 256 */ u64 t3; + /* 264 */ u64 t4; + /* 272 */ u64 t5; + /* 280 */ u64 t6; +}; + +enum procstate { UNUSED, USED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +/** Per-process state */ +struct proc { + struct spinlock lock; + + // p->lock must be held when using these: + enum procstate state; // Process state + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + int xstate; // Exit status to be returned to parent's wait + int pid; // Process ID + + // wait_lock must be held when using this: + struct proc *parent; // Parent process + + // these are private to the process, so p->lock need not be held. + u64 kstack; // Virtual address of kernel stack + u64 sz; // Size of process memory (bytes) + pagetable_t pagetable; // User page table + struct trapframe *trapframe; // data page for trampoline.S + struct context context; // swtch() here to run process + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) + int mask; // Process mask +}; diff --git a/riscv.h b/riscv.h new file mode 100644 index 0000000..ed263da --- /dev/null +++ b/riscv.h @@ -0,0 +1,383 @@ +#pragma once + +#ifndef __ASSEMBLER__ + +#include "types.h" + +// which hart (core) is this? +static inline u64 r_mhartid() { + u64 x; + asm volatile("csrr %0, mhartid" : "=r"(x)); + return x; +} + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) // previous mode. +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) // machine-mode interrupt enable. + +static inline u64 r_mstatus() { + u64 x; + asm volatile("csrr %0, mstatus" : "=r"(x)); + return x; +} + +static inline void w_mstatus(u64 x) { + asm volatile("csrw mstatus, %0" : : "r"(x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void w_mepc(u64 x) { asm volatile("csrw mepc, %0" : : "r"(x)); } + +// Supervisor Status Register, sstatus + +/** Supervisor Previous Privilege */ +#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User + +/** Supervisor Previous Interrupt Enable */ +#define SSTATUS_SPIE (1L << 5) + +/** User Previous Interrupt Enable */ +#define SSTATUS_UPIE (1L << 4) + +/** Supervisor Interrupt Enable */ +#define SSTATUS_SIE (1L << 1) + +/** User Interrupt Enable */ +#define SSTATUS_UIE (1L << 0) + +/** + * Read the value of the sstatus register. + * (Supervisor Status Register) + */ +static inline u64 r_sstatus() { + u64 x; + asm volatile("csrr %0, sstatus" : "=r"(x)); + return x; +} + +/** + * Write a value to the sstatus register. + * (Supervisor Status Register) + */ +static inline void w_sstatus(u64 x) { + asm volatile("csrw sstatus, %0" : : "r"(x)); +} + +/** Read Suporvisor Interrupt Pending */ +static inline u64 r_sip() { + u64 x; + asm volatile("csrr %0, sip" : "=r"(x)); + return x; +} + +/** Write Suporvisor Interrupt Pending */ +static inline void w_sip(u64 x) { asm volatile("csrw sip, %0" : : "r"(x)); } + +/** Supervisor External Interrup Enable */ +#define SIE_SEIE (1L << 9) + +/** Supervisor Timer Interrupt Enable */ +#define SIE_STIE (1L << 5) + +/** Supervisor Software Interrupt Enable */ +#define SIE_SSIE (1L << 1) + +/** + * Read the value of the sie register. + * (Supervisor Interrupt Enable) + */ +static inline u64 r_sie() { + u64 x; + asm volatile("csrr %0, sie" : "=r"(x)); + return x; +} + +/** + * Write the valie to the sie rgister + * (Supervisor Interrupt Enable) + */ +static inline void w_sie(u64 x) { asm volatile("csrw sie, %0" : : "r"(x)); } + +/** Machine External Interrupt Enable */ +#define MIE_MEIE (1L << 11) + +/** Machine Timer Interrupt Enable */ +#define MIE_MTIE (1L << 7) + +/** Machine Software Interrupt Enable */ +#define MIE_MSIE (1L << 3) + +/** + * Read the value of the mie register. + * (Machine Interrupt Enable) + */ +static inline u64 r_mie() { + u64 x; + asm volatile("csrr %0, mie" : "=r"(x)); + return x; +} + +/** + * Write the value to the mie register. + * (Machine Interrupt Enable) + */ +static inline void w_mie(u64 x) { asm volatile("csrw mie, %0" : : "r"(x)); } + +// The supervisor exception program counter holds the +// instruction address to which a return from +// exception will go. +// +// It is automatically set when an exception occurs. +// It can also be set with the w_sepc() function. +// (A wrapper for csrw sepc, x) +// +// Information related to the cause of the exception +// will be in the scause register. Readable with the +// r_scause() defined below. + +/** Write Supervisor Exception Program Counter */ +static inline void w_sepc(u64 x) { asm volatile("csrw sepc, %0" : : "r"(x)); } + +/** Read Supervisor Exception Program Counter */ +static inline u64 r_sepc() { + u64 x; + asm volatile("csrr %0, sepc" : "=r"(x)); + return x; +} + +/** Read Machine Exception Delegation */ +static inline u64 r_medeleg() { + u64 x; + asm volatile("csrr %0, medeleg" : "=r"(x)); + return x; +} + +/** Write Machine Exception Delegation */ +static inline void w_medeleg(u64 x) { + asm volatile("csrw medeleg, %0" : : "r"(x)); +} + +/** Read Machine Interrupt Delegation */ +static inline u64 r_mideleg() { + u64 x; + asm volatile("csrr %0, mideleg" : "=r"(x)); + return x; +} + +/** Write Machine Interrupt Delegation */ +static inline void w_mideleg(u64 x) { + asm volatile("csrw mideleg, %0" : : "r"(x)); +} + +/** Write Supervisor Trap-Vector Base Address */ +static inline void w_stvec(u64 x) { asm volatile("csrw stvec, %0" : : "r"(x)); } + +/** Read Supervisor Trap-Vector Base Address */ +static inline u64 r_stvec() { + u64 x; + asm volatile("csrr %0, stvec" : "=r"(x)); + return x; +} + +/** Write Machine Trap-Vector Base Address */ +static inline void w_mtvec(u64 x) { asm volatile("csrw mtvec, %0" : : "r"(x)); } + +/** Read Physical Memory Protection Configuration */ +static inline void w_pmpcfg0(u64 x) { + asm volatile("csrw pmpcfg0, %0" : : "r"(x)); +} + +/** Write Physical Memory Protection Configuration */ +static inline void w_pmpaddr0(u64 x) { + asm volatile("csrw pmpaddr0, %0" : : "r"(x)); +} + +/** Risc-v's sv39 page table scheme. */ +#define SATP_SV39 (8L << 60) + +/** Make Supervisor Address Translation and Protection */ +#define MAKE_SATP(pagetable) (SATP_SV39 | (((u64)pagetable) >> 12)) + +/** + * Write the value to the satp register. + * (Supervisor Address Translation and Protection) + * + * This register holds the address of the page table. + */ +static inline void w_satp(u64 x) { asm volatile("csrw satp, %0" : : "r"(x)); } + +/** + * Read the value of the satp register. + * (Supervisor Address Translation and Protection) + * Returns the address of the page table. + */ +static inline u64 r_satp() { + u64 x; + asm volatile("csrr %0, satp" : "=r"(x)); + return x; +} + +/** Read Supervisor Scratch Register */ +static inline void w_mscratch(u64 x) { + asm volatile("csrw mscratch, %0" : : "r"(x)); +} + +/** Supervisor Trap Cause */ +static inline u64 r_scause() { + u64 x; + asm volatile("csrr %0, scause" : "=r"(x)); + return x; +} + +/** Supervisor Trap Value */ +static inline u64 r_stval() { + u64 x; + asm volatile("csrr %0, stval" : "=r"(x)); + return x; +} + +/** Write Machine-mode Counter-Enable Register */ +static inline void w_mcounteren(u64 x) { + asm volatile("csrw mcounteren, %0" : : "r"(x)); +} + +/** Read Machine-mode Counter-Enable Register */ +static inline u64 r_mcounteren() { + u64 x; + asm volatile("csrr %0, mcounteren" : "=r"(x)); + return x; +} + +/** + * Machine-mode cycle counter + * Reports the current wall-clock time from the timer device. + */ +static inline u64 r_time() { + u64 x; + asm volatile("csrr %0, time" : "=r"(x)); + return x; +} + +/** Enable device interrupts */ +static inline void intr_on() { w_sstatus(r_sstatus() | SSTATUS_SIE); } + +/** Disable device interrupts */ +static inline void intr_off() { w_sstatus(r_sstatus() & ~SSTATUS_SIE); } + +/** Are device interrupts enabled? */ +static inline int intr_get() { + u64 x = r_sstatus(); + return (x & SSTATUS_SIE) != 0; +} + +/** Read stack pointer */ +static inline u64 r_sp() { + u64 x; + asm volatile("mv %0, sp" : "=r"(x)); + return x; +} + +// read and write tp, the thread pointer, which xv6 uses to hold +// this core's hartid (core number), the index into cpus[]. + +/** Read thread pointer */ +static inline u64 r_tp() { + u64 x; + asm volatile("mv %0, tp" : "=r"(x)); + return x; +} + +/** Write thread pointer */ +static inline void w_tp(u64 x) { asm volatile("mv tp, %0" : : "r"(x)); } + +/** Read the return address */ +static inline u64 r_ra() { + u64 x; + asm volatile("mv %0, ra" : "=r"(x)); + return x; +} + +/** Flush the TLB (Translation Lookaside Buffer) */ +static inline void sfence_vma() { + // the zero, zero means flush all TLB entries. + asm volatile("sfence.vma zero, zero"); +} + +/** Page Table Entry Type */ +typedef u64 pte_t; + +/** Page Table Type */ +typedef u64 *pagetable_t; // 512 PTEs + +#endif // __ASSEMBLER__ + +/** Page Size */ +#define PGSIZE 4096 // bytes per page + +/** Page Shift, bits of offset within a page */ +#define PGSHIFT 12 + +#define PGROUNDUP(sz) (((sz) + PGSIZE - 1) & ~(PGSIZE - 1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE - 1)) + +/** + * Page Table Entry Flags + */ +/** Valid */ +#define PTE_V (1L << 0) +/** Readable */ +#define PTE_R (1L << 1) +/** Writable */ +#define PTE_W (1L << 2) +/** Executable */ +#define PTE_X (1L << 3) +/** User-accessible */ +#define PTE_U (1L << 4) + +#define PTE_V (1L << 0) /** PTE Valid */ +#define PTE_R (1L << 1) /** PTE Readable */ +#define PTE_W (1L << 2) /** PTE Writeable */ +#define PTE_X (1L << 3) /** PTE Executable */ +#define PTE_U (1L << 4) /** PTE User Accessible */ + +/** + * Helper macros to shift a physical address + * to the right place for a PTE. + */ + +/** Physical Address to Page Table Entry */ +#define PA2PTE(pa) ((((u64)pa) >> 12) << 10) + +/** Page Table Entry to Physical Address */ +#define PTE2PA(pte) (((pte) >> 10) << 12) + +/** Page Table Entry Flags */ +#define PTE_FLAGS(pte) ((pte) & 0x3FF) + +/** + * Helper macros to extract the three 9-bit + * page table indices from a virtual address. + */ + +/** Page Extract Mask */ +#define PXMASK 0x1FF // 9 bits, 0b111111111 + +/** Page Extract Shift */ +#define PXSHIFT(level) (PGSHIFT + (9 * (level))) + +/** Page Extract */ +#define PX(level, va) ((((u64)(va)) >> PXSHIFT(level)) & PXMASK) + +/** + * One beyond the highest possible virtual address. + * MAXVA is actually one bit less than the max allowed by + * Sv39, to avoid having to sign-extend virtual addresses + * that have the high bit set. + */ +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) diff --git a/spinlock.c b/spinlock.c new file mode 100644 index 0000000..b40e4ad --- /dev/null +++ b/spinlock.c @@ -0,0 +1,113 @@ +/** + * Mutual exclusion spin locks. + * (Not mutexes as these are spinning locks). + */ + +#include "spinlock.h" +#include "defs.h" +#include "proc.h" +#include "riscv.h" + +/** + * The aquire() and release() functions control ownership of the lock. + * To perform these operations, modern CPU's provide atomic instructions + * that prevent the cores from stepping on each other's toes, otherwise known + * as a deadlock. + * + * GCC provides a set of built-in functions that allow you to use atomic + * instructions in an architecture-independent way. These functions are + * defined in the GCC manual: + * + * See: https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html + * See: https://en.wikipedia.org/wiki/Memory_barrier + * + * On RISC-V, sync_lock_test_and_set turns into an atomic swap: + * a5 = 1 + * s1 = &lk->locked + * amoswap.w.aq a5, a5, (s1) + * + * On RISC-V, sync_lock_release turns into an atomic swap: + * s1 = &lk->locked + * amoswap.w zero, zero, (s1) + * + * __sync_synchronize(); + * + * This function tells the C compiler and the processor to not move loads or + * stores past this point, to ensure that the critical section's memory + * references happen strictly after the lock is acquired/locked. + * On RISC-V, this emits a fence instruction. + */ + +/** Initialize spinlock */ +void initlock(struct spinlock *lk, char *name) { + lk->name = name; + lk->locked = 0; + lk->cpu = 0; +} + +/** + * Acquire the lock. + * Loops (spins) until the lock is acquired. + * Panics if the lock is already held by this cpu. + */ +void acquire(struct spinlock *lk) { + push_off(); // disable interrupts to avoid deadlock. + + if (holding(lk)) // If the lock is already held, panic. + panic("acquire"); + + // Spin until aquired. See file header for details + while (__sync_lock_test_and_set(&lk->locked, 1) != 0) { + } + __sync_synchronize(); // No loads/stores after this point + + // Record info about lock acquisition for holding() and debugging. + lk->cpu = mycpu(); +} + +/** + * Release the lock. + * Panics if the lock is not held. + */ +void release(struct spinlock *lk) { + if (!holding(lk)) // If the lock is not held, panic. + panic("release"); + + lk->cpu = 0; // 0 means unheld + __sync_synchronize(); // No loads/stores after this point + __sync_lock_release(&lk->locked); // Essentially lk->locked = 0 + + pop_off(); +} + +// Check whether this cpu is holding the lock. +// Interrupts must be off. +int holding(struct spinlock *lk) { + int r; + r = (lk->locked && lk->cpu == mycpu()); + return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off()s to undo two push_off()s. Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void push_off(void) { + int old = intr_get(); + + intr_off(); + if (mycpu()->noff == 0) + mycpu()->intena = old; + mycpu()->noff += 1; +} + +void pop_off(void) { + struct cpu *c = mycpu(); + if (intr_get()) + panic("pop_off - interruptible"); + if (c->noff < 1) + panic("pop_off"); + c->noff -= 1; + if (c->noff == 0 && c->intena) + intr_on(); +} diff --git a/spinlock.h b/spinlock.h new file mode 100644 index 0000000..43de426 --- /dev/null +++ b/spinlock.h @@ -0,0 +1,52 @@ +#pragma once + +#include "types.h" + +struct spinlock; + +// spinlock.c + +/** + * Acquire the lock. + * Loops (spins) until the lock is acquired. + * Panics if the lock is already held by this cpu. + */ +void acquire(struct spinlock *); + +/** + * Check whether this cpu is holding the lock. + * Interrupts must be off. + */ +int holding(struct spinlock *); + +/** + * Initialize spinlock + */ +void initlock(struct spinlock *, char *); + +/** + * Release the lock. + * Panics if the lock is not held. + */ +void release(struct spinlock *); + +/** + * @brief push_off/pop_off are like intr_off()/intr_on() except that they are + * matched: it takes two pop_off()s to undo two push_off()s. Also, if + * interrupts are initially off, then push_off, pop_off leaves them off. + */ +void push_off(void); + +/** @copydoc pop_off */ +void pop_off(void); + +/** Mutual exclusion spin lock */ +struct spinlock { + u32 locked; // Is the lock held? + + // NOTE: Perhaps feature gate this? + + // For debugging: + char *name; // Name of lock. + struct cpu *cpu; // The cpu holding the lock. +}; diff --git a/start.c b/start.c new file mode 100644 index 0000000..5ee81bc --- /dev/null +++ b/start.c @@ -0,0 +1,84 @@ +#include "memlayout.h" +#include "param.h" +#include "riscv.h" +#include "types.h" + +void main(); +void timerinit(); + +// Entry.S needs one stack per CPU. +__attribute__((aligned(16))) char stack0[4096 * NCPU]; + +// A scratch area per CPU for machine-mode timer interrupts. +u64 timer_scratch[NCPU][5]; + +// Assembly code in kernelvec.S for machine-mode timer interrupt. +extern void timervec(); + +// Entry.S jumps here in machine mode on stack0. +void start() { + // Set M Previous Privilege mode to Supervisor, for mret. + unsigned long x = r_mstatus(); + x &= ~MSTATUS_MPP_MASK; + x |= MSTATUS_MPP_S; + w_mstatus(x); + + // Set M Exception Program Counter to main, for mret. + // Requires gcc -mcmodel=medany + w_mepc((u64)main); + + // Disable paging for now. + w_satp(0); + + // Delegate all interrupts and exceptions to supervisor mode. + w_medeleg(0xffff); + w_mideleg(0xffff); + w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); + + // Configure Physical Memory Protection to give supervisor mode + // Access to all of physical memory. + w_pmpaddr0(0x3fffffffffffffull); + w_pmpcfg0(0xf); + + // Ask for clock interrupts. + timerinit(); + + // Keep each CPU's hartid in its tp register, for cpuid(). + int id = r_mhartid(); + w_tp(id); + + // Switch to supervisor mode and jump to main(). + asm volatile("mret"); +} + +// Arrange to receive timer interrupts. +// They will arrive in machine mode at +// at timervec in kernelvec.S, +// which turns them into software interrupts for +// devintr() in trap.c. +void timerinit() { + // Each CPU has a separate source of timer interrupts. + int id = r_mhartid(); + + // Ask the CLINT for a timer interrupt. + int interval = 1000000; // Cycles; about 1/10th second in qemu. + *(u64 *)CLINT_MTIMECMP(id) = *(u64 *)CLINT_MTIME + interval; + + // Prepare information in scratch[] for timervec. + // scratch[0..2] : space for timervec to save registers. + // scratch[3] : address of CLINT MTIMECMP register. + // scratch[4] : desired interval (in cycles) between timer interrupts. + u64 *scratch = &timer_scratch[id][0]; + scratch[3] = CLINT_MTIMECMP(id); + scratch[4] = interval; + w_mscratch((u64)scratch); + + // Set the machine-mode trap handler. + w_mtvec((u64)timervec); + + // Enable machine-mode interrupts. + w_mstatus(r_mstatus() | MSTATUS_MIE); + + // Enable machine-mode timer interrupts. + w_mie(r_mie() | MIE_MTIE); +} diff --git a/trap.c b/trap.c new file mode 100644 index 0000000..a658516 --- /dev/null +++ b/trap.c @@ -0,0 +1,200 @@ +#include "defs.h" +#include "memlayout.h" +#include "proc.h" +#include "riscv.h" +#include "spinlock.h" +#include "types.h" + +struct spinlock tickslock; +u32 ticks; + +extern char trampoline[], uservec[], userret[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void trapinit(void) { initlock(&tickslock, "time"); } + +// set up to take exceptions and traps while in the kernel. +void trapinithart(void) { w_stvec((u64)kernelvec); } + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void usertrap(void) { + int which_dev = 0; + + if ((r_sstatus() & SSTATUS_SPP) != 0) + panic("usertrap: not from user mode"); + + // send interrupts and exceptions to kerneltrap(), + // since we're now in the kernel. + w_stvec((u64)kernelvec); + + struct proc *p = myproc(); + + // save user program counter. + p->trapframe->epc = r_sepc(); + + if (r_scause() == 8) { + // system call + + if (killed(p)) + exit(-1); + + // sepc points to the ecall instruction, + // but we want to return to the next instruction. + p->trapframe->epc += 4; + + // an interrupt will change sepc, scause, and sstatus, + // so enable only now that we're done with those registers. + intr_on(); + + syscall(); + } else if ((which_dev = devintr()) != 0) { + // ok + } else { + printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); + printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); + setkilled(p); + } + + if (killed(p)) + exit(-1); + + // give up the CPU if this is a timer interrupt. + if (which_dev == 2) + yield(); + + usertrapret(); +} + +// +// return to user space +// +void usertrapret(void) { + struct proc *p = myproc(); + + // we're about to switch the destination of traps from + // kerneltrap() to usertrap(), so turn off interrupts until + // we're back in user space, where usertrap() is correct. + intr_off(); + + // send syscalls, interrupts, and exceptions to uservec in trampoline.S + u64 trampoline_uservec = TRAMPOLINE + (uservec - trampoline); + w_stvec(trampoline_uservec); + + // set up trapframe values that uservec will need when + // the process next traps into the kernel. + p->trapframe->kernel_satp = r_satp(); // kernel page table + p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack + p->trapframe->kernel_trap = (u64)usertrap; + p->trapframe->kernel_hartid = r_tp(); // hartid for cpuid() + + // set up the registers that trampoline.S's sret will use + // to get to user space. + + // set S Previous Privilege mode to User. + unsigned long x = r_sstatus(); + x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode + x |= SSTATUS_SPIE; // enable interrupts in user mode + w_sstatus(x); + + // set S Exception Program Counter to the saved user pc. + w_sepc(p->trapframe->epc); + + // tell trampoline.S the user page table to switch to. + u64 satp = MAKE_SATP(p->pagetable); + + // jump to userret in trampoline.S at the top of memory, which + // switches to the user page table, restores user registers, + // and switches to user mode with sret. + u64 trampoline_userret = TRAMPOLINE + (userret - trampoline); + ((void (*)(u64))trampoline_userret)(satp); +} + +// interrupts and exceptions from kernel code go here via kernelvec, +// on whatever the current kernel stack is. +void kerneltrap() { + int which_dev = 0; + u64 sepc = r_sepc(); + u64 sstatus = r_sstatus(); + u64 scause = r_scause(); + + if ((sstatus & SSTATUS_SPP) == 0) + panic("kerneltrap: not from supervisor mode"); + if (intr_get() != 0) + panic("kerneltrap: interrupts enabled"); + + if ((which_dev = devintr()) == 0) { + printf("scause %p\n", scause); + printf("sepc=%p stval=%p\n", r_sepc(), r_stval()); + panic("kerneltrap"); + } + + // give up the CPU if this is a timer interrupt. + if (which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING) + yield(); + + // the yield() may have caused some traps to occur, + // so restore trap registers for use by kernelvec.S's sepc instruction. + w_sepc(sepc); + w_sstatus(sstatus); +} + +void clockintr() { + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int devintr() { + u64 scause = r_scause(); + + if ((scause & 0x8000000000000000L) && (scause & 0xff) == 9) { + // this is a supervisor external interrupt, via PLIC. + + // irq indicates which device interrupted. + int irq = plic_claim(); + + if (irq == UART0_IRQ) { + uartintr(); + } else if (irq == VIRTIO0_IRQ) { + virtio_disk_intr(); + } else if (irq) { + printf("unexpected interrupt irq=%d\n", irq); + } + + // the PLIC allows each device to raise at most one + // interrupt at a time; tell the PLIC the device is + // now allowed to interrupt again. + if (irq) + plic_complete(irq); + + return 1; + } else if (scause == 0x8000000000000001L) { + // software interrupt from a machine-mode timer interrupt, + // forwarded by timervec in kernelvec.S. + + if (cpuid() == 0) { + clockintr(); + } + + // acknowledge the software interrupt by clearing + // the SSIP bit in sip. + w_sip(r_sip() & ~2); + + return 2; + } else { + return 0; + } +} diff --git a/types.h b/types.h new file mode 100644 index 0000000..acc0095 --- /dev/null +++ b/types.h @@ -0,0 +1,8 @@ +#pragma once + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; + +typedef u64 pde_t;