separate source into kernel/ user/ mkfs/

This commit is contained in:
Robert Morris 2019-06-11 09:57:14 -04:00
parent 91ba81110a
commit 5753553213
72 changed files with 157 additions and 296 deletions

145
kernel/bio.c Normal file
View file

@ -0,0 +1,145 @@
// Buffer cache.
//
// The buffer cache is a linked list of buf structures holding
// cached copies of disk block contents. Caching disk blocks
// in memory reduces the number of disk reads and also provides
// a synchronization point for disk blocks used by multiple processes.
//
// Interface:
// * To get a buffer for a particular disk block, call bread.
// * After changing buffer data, call bwrite to write it to disk.
// * When done with the buffer, call brelse.
// * Do not use the buffer after calling brelse.
// * Only one process at a time can use a buffer,
// so do not keep them longer than necessary.
//
// The implementation uses two state flags internally:
// * B_VALID: the buffer data has been read from the disk.
// * B_DIRTY: the buffer data has been modified
// and needs to be written to disk.
#include "types.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "riscv.h"
#include "defs.h"
#include "fs.h"
#include "buf.h"
struct {
struct spinlock lock;
struct buf buf[NBUF];
// Linked list of all buffers, through prev/next.
// head.next is most recently used.
struct buf head;
} bcache;
void
binit(void)
{
struct buf *b;
initlock(&bcache.lock, "bcache");
//PAGEBREAK!
// Create linked list of buffers
bcache.head.prev = &bcache.head;
bcache.head.next = &bcache.head;
for(b = bcache.buf; b < bcache.buf+NBUF; b++){
b->next = bcache.head.next;
b->prev = &bcache.head;
initsleeplock(&b->lock, "buffer");
bcache.head.next->prev = b;
bcache.head.next = b;
}
}
// Look through buffer cache for block on device dev.
// If not found, allocate a buffer.
// In either case, return locked buffer.
static struct buf*
bget(uint dev, uint blockno)
{
struct buf *b;
acquire(&bcache.lock);
// Is the block already cached?
for(b = bcache.head.next; b != &bcache.head; b = b->next){
if(b->dev == dev && b->blockno == blockno){
b->refcnt++;
release(&bcache.lock);
acquiresleep(&b->lock);
return b;
}
}
// Not cached; recycle an unused buffer.
// Even if refcnt==0, B_DIRTY indicates a buffer is in use
// because log.c has modified it but not yet committed it.
for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) {
b->dev = dev;
b->blockno = blockno;
b->flags = 0;
b->refcnt = 1;
release(&bcache.lock);
acquiresleep(&b->lock);
return b;
}
}
panic("bget: no buffers");
}
// Return a locked buf with the contents of the indicated block.
struct buf*
bread(uint dev, uint blockno)
{
struct buf *b;
b = bget(dev, blockno);
if((b->flags & B_VALID) == 0) {
ramdiskrw(b);
}
return b;
}
// Write b's contents to disk. Must be locked.
void
bwrite(struct buf *b)
{
if(!holdingsleep(&b->lock))
panic("bwrite");
b->flags |= B_DIRTY;
ramdiskrw(b);
}
// Release a locked buffer.
// Move to the head of the MRU list.
void
brelse(struct buf *b)
{
if(!holdingsleep(&b->lock))
panic("brelse");
releasesleep(&b->lock);
acquire(&bcache.lock);
b->refcnt--;
if (b->refcnt == 0) {
// no one is waiting for it.
b->next->prev = b->prev;
b->prev->next = b->next;
b->next = bcache.head.next;
b->prev = &bcache.head;
bcache.head.next->prev = b;
bcache.head.next = b;
}
release(&bcache.lock);
}
//PAGEBREAK!
// Blank page.

14
kernel/buf.h Normal file
View file

@ -0,0 +1,14 @@
struct buf {
int flags;
uint dev;
uint blockno;
struct sleeplock lock;
uint refcnt;
struct buf *prev; // LRU cache list
struct buf *next;
struct buf *qnext; // disk queue
uchar data[BSIZE];
};
#define B_VALID 0x2 // buffer has been read from disk
#define B_DIRTY 0x4 // buffer needs to be written to disk

271
kernel/console.c Normal file
View file

@ -0,0 +1,271 @@
// Console input and output.
// Input is from the keyboard or serial port.
// Output is written to the screen and serial port.
#include <stdarg.h>
#include "types.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "file.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
#include "proc.h"
static void consputc(int);
static volatile int panicked = 0;
static struct {
struct spinlock lock;
int locking;
} cons;
static char digits[] = "0123456789abcdef";
static void
printint(int xx, int base, int sign)
{
char buf[16];
int i;
uint x;
if(sign && (sign = xx < 0))
x = -xx;
else
x = xx;
i = 0;
do{
buf[i++] = digits[x % base];
}while((x /= base) != 0);
if(sign)
buf[i++] = '-';
while(--i >= 0)
consputc(buf[i]);
}
static void
printptr(uint64 x) {
int i;
consputc('0');
consputc('x');
for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
consputc(digits[x >> (sizeof(uint64) * 8 - 4)]);
}
//PAGEBREAK: 50
// Print to the console. only understands %d, %x, %p, %s.
void
printf(char *fmt, ...)
{
va_list ap;
int i, c, locking;
char *s;
locking = cons.locking;
if(locking)
acquire(&cons.lock);
if (fmt == 0)
panic("null fmt");
va_start(ap, fmt);
for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
if(c != '%'){
consputc(c);
continue;
}
c = fmt[++i] & 0xff;
if(c == 0)
break;
switch(c){
case 'd':
printint(va_arg(ap, int), 10, 1);
break;
case 'x':
printint(va_arg(ap, int), 16, 1);
break;
case 'p':
printptr(va_arg(ap, uint64));
break;
case 's':
if((s = va_arg(ap, char*)) == 0)
s = "(null)";
for(; *s; s++)
consputc(*s);
break;
case '%':
consputc('%');
break;
default:
// Print unknown % sequence to draw attention.
consputc('%');
consputc(c);
break;
}
}
if(locking)
release(&cons.lock);
}
void
panic(char *s)
{
cons.locking = 0;
printf("panic: ");
printf(s);
printf("\n");
panicked = 1; // freeze other CPU
for(;;)
;
}
#define BACKSPACE 0x100
void
consputc(int c)
{
if(panicked){
for(;;)
;
}
if(c == BACKSPACE){
uartputc('\b'); uartputc(' '); uartputc('\b');
} else
uartputc(c);
}
#define INPUT_BUF 128
struct {
char buf[INPUT_BUF];
uint r; // Read index
uint w; // Write index
uint e; // Edit index
} input;
#define C(x) ((x)-'@') // Contro
int
consoleread(struct inode *ip, int user_dst, uint64 dst, int n)
{
uint target;
int c;
char buf[1];
iunlock(ip);
target = n;
acquire(&cons.lock);
while(n > 0){
while(input.r == input.w){
if(myproc()->killed){
release(&cons.lock);
ilock(ip);
return -1;
}
sleep(&input.r, &cons.lock);
}
c = input.buf[input.r++ % INPUT_BUF];
if(c == C('D')){ // EOF
if(n < target){
// Save ^D for next time, to make sure
// caller gets a 0-byte result.
input.r--;
}
break;
}
buf[0] = c;
if(either_copyout(user_dst, dst, &buf[0], 1) == -1)
break;
dst++;
--n;
if(c == '\n')
break;
}
release(&cons.lock);
ilock(ip);
return target - n;
}
int
consolewrite(struct inode *ip, int user_src, uint64 src, int n)
{
int i;
iunlock(ip);
acquire(&cons.lock);
for(i = 0; i < n; i++){
char c;
if(either_copyin(&c, user_src, src+i, 1) == -1)
break;
consputc(c);
}
release(&cons.lock);
ilock(ip);
return n;
}
void
consoleintr(int c)
{
int doprocdump = 0;
acquire(&cons.lock);
switch(c){
case C('P'): // Process list.
// procdump() locks cons.lock indirectly; invoke later
doprocdump = 1;
break;
case C('U'): // Kill line.
while(input.e != input.w &&
input.buf[(input.e-1) % INPUT_BUF] != '\n'){
input.e--;
consputc(BACKSPACE);
}
break;
case C('H'): case '\x7f': // Backspace
if(input.e != input.w){
input.e--;
consputc(BACKSPACE);
}
break;
default:
if(c != 0 && input.e-input.r < INPUT_BUF){
c = (c == '\r') ? '\n' : c;
input.buf[input.e++ % INPUT_BUF] = c;
consputc(c);
if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){
input.w = input.e;
wakeup(&input.r);
}
}
break;
}
release(&cons.lock);
if(doprocdump)
procdump();
}
void
consoleinit(void)
{
initlock(&cons.lock, "console");
devsw[CONSOLE].write = consolewrite;
devsw[CONSOLE].read = consoleread;
cons.locking = 1;
}

8
kernel/date.h Normal file
View file

@ -0,0 +1,8 @@
struct rtcdate {
uint second;
uint minute;
uint hour;
uint day;
uint month;
uint year;
};

205
kernel/defs.h Normal file
View file

@ -0,0 +1,205 @@
struct buf;
struct context;
struct file;
struct inode;
struct pipe;
struct proc;
struct rtcdate;
struct spinlock;
struct sleeplock;
struct stat;
struct superblock;
struct sysframe;
// bio.c
void binit(void);
struct buf* bread(uint, uint);
void brelse(struct buf*);
void bwrite(struct buf*);
// console.c
void consoleinit(void);
void printf(char*, ...);
void consoleintr(int);
void panic(char*) __attribute__((noreturn));
// exec.c
int exec(char*, char**);
// file.c
struct file* filealloc(void);
void fileclose(struct file*);
struct file* filedup(struct file*);
void fileinit(void);
int fileread(struct file*, uint64, int n);
int filestat(struct file*, uint64 addr);
int filewrite(struct file*, uint64, int n);
// fs.c
void readsb(int dev, struct superblock *sb);
int dirlink(struct inode*, char*, uint);
struct inode* dirlookup(struct inode*, char*, uint*);
struct inode* ialloc(uint, short);
struct inode* idup(struct inode*);
void iinit(int dev);
void ilock(struct inode*);
void iput(struct inode*);
void iunlock(struct inode*);
void iunlockput(struct inode*);
void iupdate(struct inode*);
int namecmp(const char*, const char*);
struct inode* namei(char*);
struct inode* nameiparent(char*, char*);
int readi(struct inode*, int, uint64, uint, uint);
void stati(struct inode*, struct stat*);
int writei(struct inode*, int, uint64, uint, uint);
// ramdisk.c
void ramdiskinit(void);
void ramdiskintr(void);
void ramdiskrw(struct buf*);
// ioapic.c
void ioapicenable(int irq, int cpu);
extern uchar ioapicid;
void ioapicinit(void);
// kalloc.c
void* kalloc(void);
void kfree(void *);
void kinit();
// kbd.c
void kbdintr(void);
// lapic.c
void cmostime(struct rtcdate *r);
int lapicid(void);
extern volatile uint* lapic;
void lapiceoi(void);
void lapicinit(void);
void lapicstartap(uchar, uint);
void microdelay(int);
// log.c
void initlog(int dev);
void log_write(struct buf*);
void begin_op();
void end_op();
// mp.c
extern int ismp;
void mpinit(void);
// picirq.c
void picenable(int);
void picinit(void);
// pipe.c
int pipealloc(struct file**, struct file**);
void pipeclose(struct pipe*, int);
int piperead(struct pipe*, uint64, int);
int pipewrite(struct pipe*, uint64, int);
//PAGEBREAK: 16
// proc.c
int cpuid(void);
void exit(void);
int fork(void);
int growproc(int);
pagetable_t proc_pagetable(struct proc *);
void proc_freepagetable(pagetable_t, uint64);
int kill(int);
struct cpu* mycpu(void);
struct cpu* getmycpu(void);
struct proc* myproc();
void procinit(void);
void scheduler(void) __attribute__((noreturn));
void sched(void);
void setproc(struct proc*);
void sleep(void*, struct spinlock*);
void userinit(void);
int wait(void);
void wakeup(void*);
void yield(void);
int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
void procdump(void);
// swtch.S
void swtch(struct context*, struct context*);
// spinlock.c
void acquire(struct spinlock*);
int holding(struct spinlock*);
void initlock(struct spinlock*, char*);
void release(struct spinlock*);
void push_off(void);
void pop_off(void);
// sleeplock.c
void acquiresleep(struct sleeplock*);
void releasesleep(struct sleeplock*);
int holdingsleep(struct sleeplock*);
void initsleeplock(struct sleeplock*, char*);
// string.c
int memcmp(const void*, const void*, uint);
void* memmove(void*, const void*, uint);
void* memset(void*, int, uint);
char* safestrcpy(char*, const char*, int);
int strlen(const char*);
int strncmp(const char*, const char*, uint);
char* strncpy(char*, const char*, int);
// syscall.c
int argint(int, int*);
int argptr(int, uint64*, int);
int argstr(int, char*, int);
int argaddr(int, uint64 *);
int fetchint(uint64, int*);
int fetchstr(uint64, char*, int);
int fetchaddr(uint64, uint64*);
void syscall();
// timer.c
void timerinit(void);
// trap.c
extern uint ticks;
void trapinit(void);
void trapinithart(void);
extern struct spinlock tickslock;
void usertrapret(void);
// uart.c
void uartinit(void);
void uartintr(void);
void uartputc(int);
int uartgetc(void);
// vm.c
void kvminit(void);
void kvminithart(void);
pagetable_t uvmcreate(void);
void uvminit(pagetable_t, uchar *, uint);
uint64 uvmalloc(pagetable_t, uint64, uint64);
uint64 uvmdealloc(pagetable_t, uint64, uint64);
void uvmcopy(pagetable_t, pagetable_t, uint64);
void uvmfree(pagetable_t, uint64);
void mappages(pagetable_t, uint64, uint64, uint64, int);
void unmappages(pagetable_t, uint64, uint64, int);
uint64 walkaddr(pagetable_t, uint64);
int copyout(pagetable_t, uint64, char *, uint64);
int copyin(pagetable_t, char *, uint64, uint64);
int copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max);
// plic.c
void plicinit(void);
void plicinithart(void);
uint64 plic_pending(void);
int plic_claim(void);
void plic_complete(int);
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))

42
kernel/elf.h Normal file
View file

@ -0,0 +1,42 @@
// Format of an ELF executable file
#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian
// File header
struct elfhdr {
uint magic; // must equal ELF_MAGIC
uchar elf[12];
ushort type;
ushort machine;
uint version;
uint64 entry;
uint64 phoff;
uint64 shoff;
uint flags;
ushort ehsize;
ushort phentsize;
ushort phnum;
ushort shentsize;
ushort shnum;
ushort shstrndx;
};
// Program section header
struct proghdr {
uint32 type;
uint32 flags;
uint64 off;
uint64 vaddr;
uint64 paddr;
uint64 filesz;
uint64 memsz;
uint64 align;
};
// Values for Proghdr type
#define ELF_PROG_LOAD 1
// Flag bits for Proghdr flags
#define ELF_PROG_FLAG_EXEC 1
#define ELF_PROG_FLAG_WRITE 2
#define ELF_PROG_FLAG_READ 4

25
kernel/entry.S Normal file
View file

@ -0,0 +1,25 @@
# qemu -kernel starts at 0x1000. the instructions
# there seem to be provided by qemu, as if it
# were a ROM. the code at 0x1000 jumps to
# 0x8000000, the _start function here,
# in machine mode.
.section .data
.globl stack0
.section .text
.globl mstart
.section .text
.globl _entry
_entry:
# set up a stack for C.
# stack0 is declared in start,
# with 4096 bytes per CPU.
la sp, stack0
li a0, 1024*4
csrr a1, mhartid
addi a1, a1, 1
mul a0, a0, a1
add sp, sp, a0
# jump to mstart() in start.c
call mstart
junk:
j junk

149
kernel/exec.c Normal file
View file

@ -0,0 +1,149 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "proc.h"
#include "defs.h"
#include "elf.h"
static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz);
int
exec(char *path, char **argv)
{
char *s, *last;
int i, off;
uint64 argc, sz, sp, ustack[MAXARG+1], stackbase;
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pagetable_t pagetable = 0, oldpagetable;
struct proc *p = myproc();
uint64 oldsz = p->sz;
begin_op();
if((ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
// Check ELF header
if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
if((pagetable = proc_pagetable(p)) == 0)
goto bad;
// Load program into memory.
sz = 0;
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
if(ph.vaddr % PGSIZE != 0)
goto bad;
if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
}
iunlockput(ip);
end_op();
ip = 0;
// Allocate two pages at the next page boundary.
// Use the second as the user stack.
sz = PGROUNDUP(sz);
if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0)
goto bad;
sp = sz;
stackbase = sp - PGSIZE;
// Push argument strings, prepare rest of stack in ustack.
for(argc = 0; argv[argc]; argc++) {
if(argc >= MAXARG)
goto bad;
sp -= strlen(argv[argc]) + 1;
sp -= sp % 16; // riscv sp must be 16-byte aligned
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[argc] = sp;
}
ustack[argc] = 0;
// push the array of argv[] pointers.
sp -= (argc+1) * sizeof(uint64);
sp -= sp % 16;
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
goto bad;
// arguments to user main(argc, argv)
// argc is returned via the system call return
// value, which goes in a0.
p->tf->a1 = sp;
// Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
p->sz = sz;
p->tf->epc = elf.entry; // initial program counter = main
p->tf->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);
return argc; // this ends up in a0, the first argument to main(argc, argv)
bad:
if(pagetable)
proc_freepagetable(pagetable, sz);
if(ip){
iunlockput(ip);
end_op();
}
return -1;
}
// Load a program segment into pagetable at virtual address va.
// va must be page-aligned
// and the pages from va to va+sz must already be mapped.
// Returns 0 on success, -1 on failure.
static int
loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz)
{
uint i, n;
uint64 pa;
if((va % PGSIZE) != 0)
panic("loadseg: va must be page aligned");
for(i = 0; i < sz; i += PGSIZE){
pa = walkaddr(pagetable, va + i);
if(pa == 0)
panic("loadseg: address should exist");
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, 0, (uint64)pa, offset+i, n) != n)
return -1;
}
return 0;
}

4
kernel/fcntl.h Normal file
View file

@ -0,0 +1,4 @@
#define O_RDONLY 0x000
#define O_WRONLY 0x001
#define O_RDWR 0x002
#define O_CREATE 0x200

175
kernel/file.c Normal file
View file

@ -0,0 +1,175 @@
//
// Support functions for system calls that involve file descriptors.
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "fs.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "file.h"
#include "stat.h"
#include "proc.h"
struct devsw devsw[NDEV];
struct {
struct spinlock lock;
struct file file[NFILE];
} ftable;
void
fileinit(void)
{
initlock(&ftable.lock, "ftable");
}
// Allocate a file structure.
struct file*
filealloc(void)
{
struct file *f;
acquire(&ftable.lock);
for(f = ftable.file; f < ftable.file + NFILE; f++){
if(f->ref == 0){
f->ref = 1;
release(&ftable.lock);
return f;
}
}
release(&ftable.lock);
return 0;
}
// Increment ref count for file f.
struct file*
filedup(struct file *f)
{
acquire(&ftable.lock);
if(f->ref < 1)
panic("filedup");
f->ref++;
release(&ftable.lock);
return f;
}
// Close file f. (Decrement ref count, close when reaches 0.)
void
fileclose(struct file *f)
{
struct file ff;
acquire(&ftable.lock);
if(f->ref < 1)
panic("fileclose");
if(--f->ref > 0){
release(&ftable.lock);
return;
}
ff = *f;
f->ref = 0;
f->type = FD_NONE;
release(&ftable.lock);
if(ff.type == FD_PIPE)
pipeclose(ff.pipe, ff.writable);
else if(ff.type == FD_INODE){
begin_op();
iput(ff.ip);
end_op();
}
}
// Get metadata about file f.
// addr is a user virtual address, pointing to a struct stat.
int
filestat(struct file *f, uint64 addr)
{
struct proc *p = myproc();
struct stat st;
if(f->type == FD_INODE){
ilock(f->ip);
stati(f->ip, &st);
iunlock(f->ip);
if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0)
return -1;
return 0;
}
return -1;
}
// Read from file f.
// addr is a user virtual address.
int
fileread(struct file *f, uint64 addr, int n)
{
int r = 0;
if(f->readable == 0)
return -1;
if(f->type == FD_PIPE){
r = piperead(f->pipe, addr, n);
} else if(f->type == FD_INODE){
ilock(f->ip);
if((r = readi(f->ip, 1, addr, f->off, n)) > 0)
f->off += r;
iunlock(f->ip);
} else {
panic("fileread");
}
return r;
}
//PAGEBREAK!
// Write to file f.
// addr is a user virtual address.
int
filewrite(struct file *f, uint64 addr, int n)
{
int r, ret = 0;
if(f->writable == 0)
return -1;
if(f->type == FD_PIPE){
ret = pipewrite(f->pipe, addr, n);
} else if(f->type == FD_INODE){
// write a few blocks at a time to avoid exceeding
// the maximum log transaction size, including
// i-node, indirect block, allocation blocks,
// and 2 blocks of slop for non-aligned writes.
// this really belongs lower down, since writei()
// might be writing a device like the console.
int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512;
int i = 0;
while(i < n){
int n1 = n - i;
if(n1 > max)
n1 = max;
begin_op();
ilock(f->ip);
if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0)
f->off += r;
iunlock(f->ip);
end_op();
if(r < 0)
break;
if(r != n1)
panic("short filewrite");
i += r;
}
ret = (i == n ? n : -1);
} else {
panic("filewrite");
}
return ret;
}

37
kernel/file.h Normal file
View file

@ -0,0 +1,37 @@
struct file {
enum { FD_NONE, FD_PIPE, FD_INODE } type;
int ref; // reference count
char readable;
char writable;
struct pipe *pipe;
struct inode *ip;
uint off;
};
// in-memory copy of an inode
struct inode {
uint dev; // Device number
uint inum; // Inode number
int ref; // Reference count
struct sleeplock lock; // protects everything below here
int valid; // inode has been read from disk?
short type; // copy of disk inode
short major;
short minor;
short nlink;
uint size;
uint addrs[NDIRECT+1];
};
// table mapping major device number to
// device functions
struct devsw {
int (*read)(struct inode*, int, uint64, int);
int (*write)(struct inode*, int, uint64, int);
};
extern struct devsw devsw[];
#define CONSOLE 1

681
kernel/fs.c Normal file
View file

@ -0,0 +1,681 @@
// File system implementation. Five layers:
// + Blocks: allocator for raw disk blocks.
// + Log: crash recovery for multi-step updates.
// + Files: inode allocator, reading, writing, metadata.
// + Directories: inode with special contents (list of other inodes!)
// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
//
// This file contains the low-level file system manipulation
// routines. The (higher-level) system call implementations
// are in sysfile.c.
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "stat.h"
#include "proc.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
#include "file.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
static void itrunc(struct inode*);
// there should be one superblock per disk device, but we run with
// only one device
struct superblock sb;
// Read the super block.
void
readsb(int dev, struct superblock *sb)
{
struct buf *bp;
bp = bread(dev, 1);
memmove(sb, bp->data, sizeof(*sb));
brelse(bp);
}
// Zero a block.
static void
bzero(int dev, int bno)
{
struct buf *bp;
bp = bread(dev, bno);
memset(bp->data, 0, BSIZE);
log_write(bp);
brelse(bp);
}
// Blocks.
// Allocate a zeroed disk block.
static uint
balloc(uint dev)
{
int b, bi, m;
struct buf *bp;
bp = 0;
for(b = 0; b < sb.size; b += BPB){
bp = bread(dev, BBLOCK(b, sb));
for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0){ // Is block free?
bp->data[bi/8] |= m; // Mark block in use.
log_write(bp);
brelse(bp);
bzero(dev, b + bi);
return b + bi;
}
}
brelse(bp);
}
panic("balloc: out of blocks");
}
// Free a disk block.
static void
bfree(int dev, uint b)
{
struct buf *bp;
int bi, m;
readsb(dev, &sb);
bp = bread(dev, BBLOCK(b, sb));
bi = b % BPB;
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0)
panic("freeing free block");
bp->data[bi/8] &= ~m;
log_write(bp);
brelse(bp);
}
// Inodes.
//
// An inode describes a single unnamed file.
// The inode disk structure holds metadata: the file's type,
// its size, the number of links referring to it, and the
// list of blocks holding the file's content.
//
// The inodes are laid out sequentially on disk at
// sb.startinode. Each inode has a number, indicating its
// position on the disk.
//
// The kernel keeps a cache of in-use inodes in memory
// to provide a place for synchronizing access
// to inodes used by multiple processes. The cached
// inodes include book-keeping information that is
// not stored on disk: ip->ref and ip->valid.
//
// An inode and its in-memory representation go through a
// sequence of states before they can be used by the
// rest of the file system code.
//
// * Allocation: an inode is allocated if its type (on disk)
// is non-zero. ialloc() allocates, and iput() frees if
// the reference and link counts have fallen to zero.
//
// * Referencing in cache: an entry in the inode cache
// is free if ip->ref is zero. Otherwise ip->ref tracks
// the number of in-memory pointers to the entry (open
// files and current directories). iget() finds or
// creates a cache entry and increments its ref; iput()
// decrements ref.
//
// * Valid: the information (type, size, &c) in an inode
// cache entry is only correct when ip->valid is 1.
// ilock() reads the inode from
// the disk and sets ip->valid, while iput() clears
// ip->valid if ip->ref has fallen to zero.
//
// * Locked: file system code may only examine and modify
// the information in an inode and its content if it
// has first locked the inode.
//
// Thus a typical sequence is:
// ip = iget(dev, inum)
// ilock(ip)
// ... examine and modify ip->xxx ...
// iunlock(ip)
// iput(ip)
//
// ilock() is separate from iget() so that system calls can
// get a long-term reference to an inode (as for an open file)
// and only lock it for short periods (e.g., in read()).
// The separation also helps avoid deadlock and races during
// pathname lookup. iget() increments ip->ref so that the inode
// stays cached and pointers to it remain valid.
//
// Many internal file system functions expect the caller to
// have locked the inodes involved; this lets callers create
// multi-step atomic operations.
//
// The icache.lock spin-lock protects the allocation of icache
// entries. Since ip->ref indicates whether an entry is free,
// and ip->dev and ip->inum indicate which i-node an entry
// holds, one must hold icache.lock while using any of those fields.
//
// An ip->lock sleep-lock protects all ip-> fields other than ref,
// dev, and inum. One must hold ip->lock in order to
// read or write that inode's ip->valid, ip->size, ip->type, &c.
struct {
struct spinlock lock;
struct inode inode[NINODE];
} icache;
void
iinit(int dev)
{
int i = 0;
initlock(&icache.lock, "icache");
for(i = 0; i < NINODE; i++) {
initsleeplock(&icache.inode[i].lock, "inode");
}
readsb(dev, &sb);
if(sb.magic != FSMAGIC)
panic("invalid file system");
}
static struct inode* iget(uint dev, uint inum);
//PAGEBREAK!
// Allocate an inode on device dev.
// Mark it as allocated by giving it type type.
// Returns an unlocked but allocated and referenced inode.
struct inode*
ialloc(uint dev, short type)
{
int inum;
struct buf *bp;
struct dinode *dip;
for(inum = 1; inum < sb.ninodes; inum++){
bp = bread(dev, IBLOCK(inum, sb));
dip = (struct dinode*)bp->data + inum%IPB;
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
log_write(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
}
brelse(bp);
}
panic("ialloc: no inodes");
}
// Copy a modified in-memory inode to disk.
// Must be called after every change to an ip->xxx field
// that lives on disk, since i-node cache is write-through.
// Caller must hold ip->lock.
void
iupdate(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
dip->type = ip->type;
dip->major = ip->major;
dip->minor = ip->minor;
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
log_write(bp);
brelse(bp);
}
// Find the inode with number inum on device dev
// and return the in-memory copy. Does not lock
// the inode and does not read it from disk.
static struct inode*
iget(uint dev, uint inum)
{
struct inode *ip, *empty;
acquire(&icache.lock);
// Is the inode already cached?
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
ip->ref++;
release(&icache.lock);
return ip;
}
if(empty == 0 && ip->ref == 0) // Remember empty slot.
empty = ip;
}
// Recycle an inode cache entry.
if(empty == 0)
panic("iget: no inodes");
ip = empty;
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
ip->valid = 0;
release(&icache.lock);
return ip;
}
// Increment reference count for ip.
// Returns ip to enable ip = idup(ip1) idiom.
struct inode*
idup(struct inode *ip)
{
acquire(&icache.lock);
ip->ref++;
release(&icache.lock);
return ip;
}
// Lock the given inode.
// Reads the inode from disk if necessary.
void
ilock(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
if(ip == 0 || ip->ref < 1)
panic("ilock");
acquiresleep(&ip->lock);
if(ip->valid == 0){
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->valid = 1;
if(ip->type == 0)
panic("ilock: no type");
}
}
// Unlock the given inode.
void
iunlock(struct inode *ip)
{
if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
panic("iunlock");
releasesleep(&ip->lock);
}
// Drop a reference to an in-memory inode.
// If that was the last reference, the inode cache entry can
// be recycled.
// If that was the last reference and the inode has no links
// to it, free the inode (and its content) on disk.
// All calls to iput() must be inside a transaction in
// case it has to free the inode.
void
iput(struct inode *ip)
{
acquire(&icache.lock);
if(ip->ref == 1 && ip->valid && ip->nlink == 0){
// inode has no links and no other references: truncate and free.
// ip->ref == 1 means no other process can have ip locked,
// so this acquiresleep() won't block (or deadlock).
acquiresleep(&ip->lock);
release(&icache.lock);
itrunc(ip);
ip->type = 0;
iupdate(ip);
ip->valid = 0;
releasesleep(&ip->lock);
acquire(&icache.lock);
}
ip->ref--;
release(&icache.lock);
}
// Common idiom: unlock, then put.
void
iunlockput(struct inode *ip)
{
iunlock(ip);
iput(ip);
}
//PAGEBREAK!
// Inode content
//
// The content (data) associated with each inode is stored
// in blocks on the disk. The first NDIRECT block numbers
// are listed in ip->addrs[]. The next NINDIRECT blocks are
// listed in block ip->addrs[NDIRECT].
// Return the disk block address of the nth block in inode ip.
// If there is no such block, bmap allocates one.
static uint
bmap(struct inode *ip, uint bn)
{
uint addr, *a;
struct buf *bp;
if(bn < NDIRECT){
if((addr = ip->addrs[bn]) == 0)
ip->addrs[bn] = addr = balloc(ip->dev);
return addr;
}
bn -= NDIRECT;
if(bn < NINDIRECT){
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[NDIRECT]) == 0)
ip->addrs[NDIRECT] = addr = balloc(ip->dev);
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if((addr = a[bn]) == 0){
a[bn] = addr = balloc(ip->dev);
log_write(bp);
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
// Truncate inode (discard contents).
// Only called when the inode has no links
// to it (no directory entries referring to it)
// and has no in-memory reference to it (is
// not an open file or current directory).
static void
itrunc(struct inode *ip)
{
int i, j;
struct buf *bp;
uint *a;
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
a = (uint*)bp->data;
for(j = 0; j < NINDIRECT; j++){
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
}
ip->size = 0;
iupdate(ip);
}
// Copy stat information from inode.
// Caller must hold ip->lock.
void
stati(struct inode *ip, struct stat *st)
{
st->dev = ip->dev;
st->ino = ip->inum;
st->type = ip->type;
st->nlink = ip->nlink;
st->size = ip->size;
}
//PAGEBREAK!
// Read data from inode.
// Caller must hold ip->lock.
// If user_dst==1, then dst is a user virtual address;
// otherwise, dst is a kernel address.
int
readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)
{
uint tot, m;
struct buf *bp;
if(ip->type == T_DEV){
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
return -1;
return devsw[ip->major].read(ip, user_dst, dst, n);
}
if(off > ip->size || off + n < off)
return -1;
if(off + n > ip->size)
n = ip->size - off;
for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
bp = bread(ip->dev, bmap(ip, off/BSIZE));
m = min(n - tot, BSIZE - off%BSIZE);
if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1)
break;
brelse(bp);
}
return n;
}
// PAGEBREAK!
// Write data to inode.
// Caller must hold ip->lock.
// If user_src==1, then src is a user virtual address;
// otherwise, src is a kernel address.
int
writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)
{
uint tot, m;
struct buf *bp;
if(ip->type == T_DEV){
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write){
return -1;
}
return devsw[ip->major].write(ip, user_src, src, n);
}
if(off > ip->size || off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
return -1;
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
bp = bread(ip->dev, bmap(ip, off/BSIZE));
m = min(n - tot, BSIZE - off%BSIZE);
if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1)
break;
log_write(bp);
brelse(bp);
}
if(n > 0 && off > ip->size){
ip->size = off;
iupdate(ip);
}
return n;
}
//PAGEBREAK!
// Directories
int
namecmp(const char *s, const char *t)
{
return strncmp(s, t, DIRSIZ);
}
// Look for a directory entry in a directory.
// If found, set *poff to byte offset of entry.
struct inode*
dirlookup(struct inode *dp, char *name, uint *poff)
{
uint off, inum;
struct dirent de;
if(dp->type != T_DIR)
panic("dirlookup not DIR");
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
panic("dirlookup read");
if(de.inum == 0)
continue;
if(namecmp(name, de.name) == 0){
// entry matches path element
if(poff)
*poff = off;
inum = de.inum;
return iget(dp->dev, inum);
}
}
return 0;
}
// Write a new directory entry (name, inum) into the directory dp.
int
dirlink(struct inode *dp, char *name, uint inum)
{
int off;
struct dirent de;
struct inode *ip;
// Check that name is not present.
if((ip = dirlookup(dp, name, 0)) != 0){
iput(ip);
return -1;
}
// Look for an empty dirent.
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
panic("dirlink read");
if(de.inum == 0)
break;
}
strncpy(de.name, name, DIRSIZ);
de.inum = inum;
if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
panic("dirlink");
return 0;
}
//PAGEBREAK!
// Paths
// Copy the next path element from path into name.
// Return a pointer to the element following the copied one.
// The returned path has no leading slashes,
// so the caller can check *path=='\0' to see if the name is the last one.
// If no name to remove, return 0.
//
// Examples:
// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
// skipelem("///a//bb", name) = "bb", setting name = "a"
// skipelem("a", name) = "", setting name = "a"
// skipelem("", name) = skipelem("////", name) = 0
//
static char*
skipelem(char *path, char *name)
{
char *s;
int len;
while(*path == '/')
path++;
if(*path == 0)
return 0;
s = path;
while(*path != '/' && *path != 0)
path++;
len = path - s;
if(len >= DIRSIZ)
memmove(name, s, DIRSIZ);
else {
memmove(name, s, len);
name[len] = 0;
}
while(*path == '/')
path++;
return path;
}
// Look up and return the inode for a path name.
// If parent != 0, return the inode for the parent and copy the final
// path element into name, which must have room for DIRSIZ bytes.
// Must be called inside a transaction since it calls iput().
static struct inode*
namex(char *path, int nameiparent, char *name)
{
struct inode *ip, *next;
if(*path == '/')
ip = iget(ROOTDEV, ROOTINO);
else
ip = idup(myproc()->cwd);
while((path = skipelem(path, name)) != 0){
ilock(ip);
if(ip->type != T_DIR){
iunlockput(ip);
return 0;
}
if(nameiparent && *path == '\0'){
// Stop one level early.
iunlock(ip);
return ip;
}
if((next = dirlookup(ip, name, 0)) == 0){
iunlockput(ip);
return 0;
}
iunlockput(ip);
ip = next;
}
if(nameiparent){
iput(ip);
return 0;
}
return ip;
}
struct inode*
namei(char *path)
{
char name[DIRSIZ];
return namex(path, 0, name);
}
struct inode*
nameiparent(char *path, char *name)
{
return namex(path, 1, name);
}

60
kernel/fs.h Normal file
View file

@ -0,0 +1,60 @@
// On-disk file system format.
// Both the kernel and user programs use this header file.
#define ROOTINO 1 // root i-number
#define BSIZE 1024 // block size
// Disk layout:
// [ boot block | super block | log | inode blocks |
// free bit map | data blocks]
//
// mkfs computes the super block and builds an initial file system. The
// super block describes the disk layout:
struct superblock {
uint magic; // Must be FSMAGIC
uint size; // Size of file system image (blocks)
uint nblocks; // Number of data blocks
uint ninodes; // Number of inodes.
uint nlog; // Number of log blocks
uint logstart; // Block number of first log block
uint inodestart; // Block number of first inode block
uint bmapstart; // Block number of first free map block
};
#define FSMAGIC 0x10203040
#define NDIRECT 12
#define NINDIRECT (BSIZE / sizeof(uint))
#define MAXFILE (NDIRECT + NINDIRECT)
// On-disk inode structure
struct dinode {
short type; // File type
short major; // Major device number (T_DEV only)
short minor; // Minor device number (T_DEV only)
short nlink; // Number of links to inode in file system
uint size; // Size of file (bytes)
uint addrs[NDIRECT+1]; // Data block addresses
};
// Inodes per block.
#define IPB (BSIZE / sizeof(struct dinode))
// Block containing inode i
#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart)
// Bitmap bits per block
#define BPB (BSIZE*8)
// Block of free map containing bit for block b
#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart)
// Directory is a file containing a sequence of dirent structures.
#define DIRSIZ 14
struct dirent {
ushort inum;
char name[DIRSIZ];
};

80
kernel/kalloc.c Normal file
View file

@ -0,0 +1,80 @@
// Physical memory allocator, intended to allocate
// memory for user processes, kernel stacks, page table pages,
// and pipe buffers. Allocates 4096-byte pages.
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void freerange(void *pa_start, void *pa_end);
extern char end[]; // first address after kernel loaded from ELF file
// defined by the kernel linker script in kernel.ld
struct run {
struct run *next;
};
struct {
struct spinlock lock;
struct run *freelist;
} kmem;
void
kinit()
{
initlock(&kmem.lock, "kmem");
freerange(end, (void*)PHYSTOP);
}
void
freerange(void *pa_start, void *pa_end)
{
char *p;
p = (char*)PGROUNDUP((uint64)pa_start);
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p);
}
//PAGEBREAK: 21
// Free the page of physical memory pointed at by v,
// which normally should have been returned by a
// call to kalloc(). (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
acquire(&kmem.lock);
r = (struct run*)pa;
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
}
// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r)
memset((char*)r, 5, PGSIZE); // fill with junk
return (void*)r;
}

31
kernel/kernel.ld Normal file
View file

@ -0,0 +1,31 @@
OUTPUT_ARCH( "riscv" )
ENTRY( _entry )
SECTIONS
{
/*
* ensure that entry.S / _entry is at 0x80000000,
* where qemu's -kernel jumps.
*/
. = 0x80000000;
.text :
{
*(.text)
. = ALIGN(0x1000);
*(trampoline)
}
. = ALIGN(0x1000);
PROVIDE(etext = .);
/*
* make sure end is after data and bss.
*/
.data : {
*(.data)
}
bss : {
*(.bss)
PROVIDE(end = .);
}
}

112
kernel/kernelvec.S Normal file
View file

@ -0,0 +1,112 @@
#
# interrupts and exceptions while in supervisor
# mode come here.
#
# push all registers, call kerneltrap(), restore, return.
#
.globl kerneltrap
.globl kernelvec
.align 4
kernelvec:
addi sp, sp, -256
sd ra, 0(sp)
sd sp, 8(sp)
sd gp, 16(sp)
sd tp, 24(sp)
sd t0, 32(sp)
sd t1, 40(sp)
sd t2, 48(sp)
sd s0, 56(sp)
sd s1, 64(sp)
sd a0, 72(sp)
sd a1, 80(sp)
sd a2, 88(sp)
sd a3, 96(sp)
sd a4, 104(sp)
sd a5, 112(sp)
sd a6, 120(sp)
sd a7, 128(sp)
sd s2, 136(sp)
sd s3, 144(sp)
sd s4, 152(sp)
sd s5, 160(sp)
sd s6, 168(sp)
sd s7, 176(sp)
sd s8, 184(sp)
sd s9, 192(sp)
sd s10, 200(sp)
sd s11, 208(sp)
sd t3, 216(sp)
sd t4, 224(sp)
sd t5, 232(sp)
sd t6, 240(sp)
call kerneltrap
ld ra, 0(sp)
ld sp, 8(sp)
ld gp, 16(sp)
ld tp, 24(sp)
ld t0, 32(sp)
ld t1, 40(sp)
ld t2, 48(sp)
ld s0, 56(sp)
ld s1, 64(sp)
ld a0, 72(sp)
ld a1, 80(sp)
ld a2, 88(sp)
ld a3, 96(sp)
ld a4, 104(sp)
ld a5, 112(sp)
ld a6, 120(sp)
ld a7, 128(sp)
ld s2, 136(sp)
ld s3, 144(sp)
ld s4, 152(sp)
ld s5, 160(sp)
ld s6, 168(sp)
ld s7, 176(sp)
ld s8, 184(sp)
ld s9, 192(sp)
ld s10, 200(sp)
ld s11, 208(sp)
ld t3, 216(sp)
ld t4, 224(sp)
ld t5, 232(sp)
ld t6, 240(sp)
addi sp, sp, 256
sret
#
# machine-mode timer interrupt.
#
.globl machinevec
.align 4
machinevec:
csrrw a0, mscratch, a0
sd a1, 0(a0)
sd a2, 8(a0)
sd a3, 16(a0)
sd a4, 24(a0)
# add another second to mtimecmp0.
ld a1, 32(a0) # CLINT_MTIMECMP(hart)
ld a2, 40(a0) # ticks per second
ld a3, 0(a1)
add a3, a3, a2
sd a3, 0(a1)
# raise a supervisor software interrupt.
li a1, 2
csrw sip, a1
ld a4, 24(a0)
ld a3, 16(a0)
ld a2, 8(a0)
ld a1, 0(a0)
csrrw a0, mscratch, a0
mret

235
kernel/log.c Normal file
View file

@ -0,0 +1,235 @@
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
// Simple logging that allows concurrent FS system calls.
//
// A log transaction contains the updates of multiple FS system
// calls. The logging system only commits when there are
// no FS system calls active. Thus there is never
// any reasoning required about whether a commit might
// write an uncommitted system call's updates to disk.
//
// A system call should call begin_op()/end_op() to mark
// its start and end. Usually begin_op() just increments
// the count of in-progress FS system calls and returns.
// But if it thinks the log is close to running out, it
// sleeps until the last outstanding end_op() commits.
//
// The log is a physical re-do log containing disk blocks.
// The on-disk log format:
// header block, containing block #s for block A, B, C, ...
// block A
// block B
// block C
// ...
// Log appends are synchronous.
// Contents of the header block, used for both the on-disk header block
// and to keep track in memory of logged block# before commit.
struct logheader {
int n;
int block[LOGSIZE];
};
struct log {
struct spinlock lock;
int start;
int size;
int outstanding; // how many FS sys calls are executing.
int committing; // in commit(), please wait.
int dev;
struct logheader lh;
};
struct log log;
static void recover_from_log(void);
static void commit();
void
initlog(int dev)
{
if (sizeof(struct logheader) >= BSIZE)
panic("initlog: too big logheader");
struct superblock sb;
initlock(&log.lock, "log");
readsb(dev, &sb);
log.start = sb.logstart;
log.size = sb.nlog;
log.dev = dev;
recover_from_log();
}
// Copy committed blocks from log to their home location
static void
install_trans(void)
{
int tail;
for (tail = 0; tail < log.lh.n; tail++) {
struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst
memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst
bwrite(dbuf); // write dst to disk
brelse(lbuf);
brelse(dbuf);
}
}
// Read the log header from disk into the in-memory log header
static void
read_head(void)
{
struct buf *buf = bread(log.dev, log.start);
struct logheader *lh = (struct logheader *) (buf->data);
int i;
log.lh.n = lh->n;
for (i = 0; i < log.lh.n; i++) {
log.lh.block[i] = lh->block[i];
}
brelse(buf);
}
// Write in-memory log header to disk.
// This is the true point at which the
// current transaction commits.
static void
write_head(void)
{
struct buf *buf = bread(log.dev, log.start);
struct logheader *hb = (struct logheader *) (buf->data);
int i;
hb->n = log.lh.n;
for (i = 0; i < log.lh.n; i++) {
hb->block[i] = log.lh.block[i];
}
bwrite(buf);
brelse(buf);
}
static void
recover_from_log(void)
{
read_head();
install_trans(); // if committed, copy from log to disk
log.lh.n = 0;
write_head(); // clear the log
}
// called at the start of each FS system call.
void
begin_op(void)
{
acquire(&log.lock);
while(1){
if(log.committing){
sleep(&log, &log.lock);
} else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
// this op might exhaust log space; wait for commit.
sleep(&log, &log.lock);
} else {
log.outstanding += 1;
release(&log.lock);
break;
}
}
}
// called at the end of each FS system call.
// commits if this was the last outstanding operation.
void
end_op(void)
{
int do_commit = 0;
acquire(&log.lock);
log.outstanding -= 1;
if(log.committing)
panic("log.committing");
if(log.outstanding == 0){
do_commit = 1;
log.committing = 1;
} else {
// begin_op() may be waiting for log space,
// and decrementing log.outstanding has decreased
// the amount of reserved space.
wakeup(&log);
}
release(&log.lock);
if(do_commit){
// call commit w/o holding locks, since not allowed
// to sleep with locks.
commit();
acquire(&log.lock);
log.committing = 0;
wakeup(&log);
release(&log.lock);
}
}
// Copy modified blocks from cache to log.
static void
write_log(void)
{
int tail;
for (tail = 0; tail < log.lh.n; tail++) {
struct buf *to = bread(log.dev, log.start+tail+1); // log block
struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block
memmove(to->data, from->data, BSIZE);
bwrite(to); // write the log
brelse(from);
brelse(to);
}
}
static void
commit()
{
if (log.lh.n > 0) {
write_log(); // Write modified blocks from cache to log
write_head(); // Write header to disk -- the real commit
install_trans(); // Now install writes to home locations
log.lh.n = 0;
write_head(); // Erase the transaction from the log
}
}
// Caller has modified b->data and is done with the buffer.
// Record the block number and pin in the cache with B_DIRTY.
// commit()/write_log() will do the disk write.
//
// log_write() replaces bwrite(); a typical use is:
// bp = bread(...)
// modify bp->data[]
// log_write(bp)
// brelse(bp)
void
log_write(struct buf *b)
{
int i;
if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
panic("too big a transaction");
if (log.outstanding < 1)
panic("log_write outside of trans");
acquire(&log.lock);
for (i = 0; i < log.lh.n; i++) {
if (log.lh.block[i] == b->blockno) // log absorbtion
break;
}
log.lh.block[i] = b->blockno;
if (i == log.lh.n)
log.lh.n++;
b->flags |= B_DIRTY; // prevent eviction
release(&log.lock);
}

42
kernel/main.c Normal file
View file

@ -0,0 +1,42 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
volatile static int started = 0;
// Bootstrap processor starts running C code here.
// Allocate a real stack and switch to it, first
// doing some setup required for memory allocator to work.
void
main()
{
if(cpuid() == 0){
uartinit(); // serial port
consoleinit();
printf("hart %d starting\n", cpuid());
kinit(); // physical page allocator
kvminit(); // create kernel page table
kvminithart(); // turn on paging
procinit(); // process table
trapinit(); // trap vectors
trapinithart(); // install kernel trap vector
plicinit(); // set up interrupt controller
plicinithart(); // ask PLIC for device interrupts
binit(); // buffer cache
fileinit(); // file table
ramdiskinit(); // disk
userinit(); // first user process
started = 1;
} else {
while(started == 0)
;
printf("hart %d starting\n", cpuid());
kvminithart(); // turn on paging
trapinithart(); // install kernel trap vector
plicinithart(); // ask PLIC for device interrupts
}
scheduler();
}

50
kernel/memlayout.h Normal file
View file

@ -0,0 +1,50 @@
// Physical memory layout
// qemu -machine virt is set up like this,
// based on qemu's hw/riscv/virt.c:
//
// 00001000 -- boot ROM, provided by qemu
// 02000000 -- CLINT
// 0C000000 -- PLIC
// 10000000 -- uart0 registers
// 80000000 -- boot ROM jumps here in machine mode
// -kernel loads the kernel here
// 88000000 -- -initrd fs.img ramdisk image.
// unused RAM after 80000000.
// the kernel uses physical memory thus:
// 80000000 -- entry.S, then kernel text and data
// end -- start of kernel page allocation area
// PHYSTOP -- end RAM used by the kernel
// qemu puts UART registers here in physical memory.
#define UART0 0x10000000L
#define UART0_IRQ 10
// local interrupt controller, which contains the timer.
#define CLINT 0x2000000L
#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))
#define CLINT_MTIME (CLINT + 0xBFF8)
// qemu puts programmable interrupt controller here.
#define PLIC 0x0c000000L
#define PLIC_PRIORITY (PLIC + 0x0)
#define PLIC_PENDING (PLIC + 0x1000)
#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100)
#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100)
#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000)
#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000)
#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000)
#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000)
#define RAMDISK 0x88000000L
// the kernel expects there to be RAM
// for use by the kernel and user pages
// from physical address 0x80000000 to PHYSTOP.
#define KERNBASE 0x80000000L
#define PHYSTOP (KERNBASE + 128*1024*1024)
// map the trampoline page to the highest address,
// in both user and kernel space.
#define TRAMPOLINE (MAXVA - PGSIZE)

13
kernel/param.h Normal file
View file

@ -0,0 +1,13 @@
#define NPROC 64 // maximum number of processes
#define NCPU 8 // maximum number of CPUs
#define NOFILE 16 // open files per process
#define NFILE 100 // open files per system
#define NINODE 50 // maximum number of active i-nodes
#define NDEV 10 // maximum major device number
#define ROOTDEV 1 // device number of file system root disk
#define MAXARG 32 // max exec arguments
#define MAXOPBLOCKS 10 // max # of blocks any FS op writes
#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log
#define NBUF (MAXOPBLOCKS*3) // size of disk block cache
#define FSSIZE 1000 // size of file system in blocks
#define MAXPATH 128 // maximum file path name

129
kernel/pipe.c Normal file
View file

@ -0,0 +1,129 @@
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "proc.h"
#include "fs.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "file.h"
#define PIPESIZE 512
struct pipe {
struct spinlock lock;
char data[PIPESIZE];
uint nread; // number of bytes read
uint nwrite; // number of bytes written
int readopen; // read fd is still open
int writeopen; // write fd is still open
};
int
pipealloc(struct file **f0, struct file **f1)
{
struct pipe *p;
p = 0;
*f0 = *f1 = 0;
if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0)
goto bad;
if((p = (struct pipe*)kalloc()) == 0)
goto bad;
p->readopen = 1;
p->writeopen = 1;
p->nwrite = 0;
p->nread = 0;
initlock(&p->lock, "pipe");
(*f0)->type = FD_PIPE;
(*f0)->readable = 1;
(*f0)->writable = 0;
(*f0)->pipe = p;
(*f1)->type = FD_PIPE;
(*f1)->readable = 0;
(*f1)->writable = 1;
(*f1)->pipe = p;
return 0;
//PAGEBREAK: 20
bad:
if(p)
kfree((char*)p);
if(*f0)
fileclose(*f0);
if(*f1)
fileclose(*f1);
return -1;
}
void
pipeclose(struct pipe *p, int writable)
{
acquire(&p->lock);
if(writable){
p->writeopen = 0;
wakeup(&p->nread);
} else {
p->readopen = 0;
wakeup(&p->nwrite);
}
if(p->readopen == 0 && p->writeopen == 0){
release(&p->lock);
kfree((char*)p);
} else
release(&p->lock);
}
//PAGEBREAK: 40
int
pipewrite(struct pipe *p, uint64 addr, int n)
{
int i;
char ch;
struct proc *pr = myproc();
acquire(&p->lock);
for(i = 0; i < n; i++){
while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full
if(p->readopen == 0 || myproc()->killed){
release(&p->lock);
return -1;
}
wakeup(&p->nread);
sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep
}
if(copyin(pr->pagetable, &ch, addr + i, 1) == -1)
break;
p->data[p->nwrite++ % PIPESIZE] = ch;
}
wakeup(&p->nread); //DOC: pipewrite-wakeup1
release(&p->lock);
return n;
}
int
piperead(struct pipe *p, uint64 addr, int n)
{
int i;
struct proc *pr = myproc();
char ch;
acquire(&p->lock);
while(p->nread == p->nwrite && p->writeopen){ //DOC: pipe-empty
if(myproc()->killed){
release(&p->lock);
return -1;
}
sleep(&p->nread, &p->lock); //DOC: piperead-sleep
}
for(i = 0; i < n; i++){ //DOC: piperead-copy
if(p->nread == p->nwrite)
break;
ch = p->data[p->nread++ % PIPESIZE];
if(copyout(pr->pagetable, addr + i, &ch, 1) == -1)
break;
}
wakeup(&p->nwrite); //DOC: piperead-wakeup
release(&p->lock);
return i;
}

63
kernel/plic.c Normal file
View file

@ -0,0 +1,63 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
//
// the riscv Platform Level Interrupt Controller (PLIC).
//
void
plicinit(void)
{
// set uart's priority to be non-zero (otherwise disabled).
*(uint32*)(PLIC + UART0_IRQ*4) = 1;
}
void
plicinithart(void)
{
int hart = cpuid();
// set uart's enable bit for this hart's S-mode.
//*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ);
*(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ);
// set this hart's S-mode priority threshold to 0.
//*(uint32*)(PLIC + 0x201000) = 0;
*(uint32*)PLIC_SPRIORITY(hart) = 0;
}
// return a bitmap of which IRQs are waiting
// to be served.
uint64
plic_pending(void)
{
uint64 mask;
//mask = *(uint32*)(PLIC + 0x1000);
//mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32;
mask = *(uint64*)PLIC_PENDING;
return mask;
}
// ask the PLIC what interrupt we should serve.
int
plic_claim(void)
{
int hart = cpuid();
//int irq = *(uint32*)(PLIC + 0x201004);
int irq = *(uint32*)PLIC_SCLAIM(hart);
return irq;
}
// tell the PLIC we've served this IRQ.
void
plic_complete(int irq)
{
int hart = cpuid();
//*(uint32*)(PLIC + 0x201004) = irq;
*(uint32*)PLIC_SCLAIM(hart) = irq;
}

591
kernel/proc.c Normal file
View file

@ -0,0 +1,591 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "proc.h"
#include "spinlock.h"
#include "defs.h"
struct {
struct spinlock lock;
struct proc proc[NPROC];
} ptable;
struct cpu cpus[NCPU];
struct proc *initproc;
int nextpid = 1;
extern void forkret(void);
// for returning out of the kernel
extern void sysexit(void);
static void wakeup1(void *chan);
extern char trampout[]; // trampoline.S
void
procinit(void)
{
initlock(&ptable.lock, "ptable");
}
// Must be called with interrupts disabled,
// to prevent race with process being moved
// to a different CPU.
int
cpuid()
{
int id = r_tp();
return id;
}
// Return this core's cpu struct.
// Interrupts must be disabled.
struct cpu*
mycpu(void) {
int id = cpuid();
struct cpu *c = &cpus[id];
return c;
}
// Return the current struct proc *.
struct proc*
myproc(void) {
push_off();
struct cpu *c = mycpu();
struct proc *p = c->proc;
pop_off();
return p;
}
//PAGEBREAK: 32
// Look in the process table for an UNUSED proc.
// If found, change state to EMBRYO and initialize
// state required to run in the kernel.
// Otherwise return 0.
static struct proc*
allocproc(void)
{
struct proc *p;
acquire(&ptable.lock);
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
if(p->state == UNUSED)
goto found;
release(&ptable.lock);
return 0;
found:
p->state = EMBRYO;
p->pid = nextpid++;
release(&ptable.lock);
// Allocate a page for the kernel stack.
if((p->kstack = kalloc()) == 0){
p->state = UNUSED;
return 0;
}
// Allocate a trapframe page.
if((p->tf = (struct trapframe *)kalloc()) == 0){
p->state = UNUSED;
return 0;
}
// An empty user page table.
p->pagetable = proc_pagetable(p);
// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof p->context);
p->context.ra = (uint64)forkret;
p->context.sp = (uint64)p->kstack + PGSIZE;
return p;
}
// Create a page table for a given process,
// with no users pages, but with trampoline pages.
// Called both when creating a process, and
// by exec() when building tentative new memory image,
// which might fail.
pagetable_t
proc_pagetable(struct proc *p)
{
pagetable_t pagetable;
// An empty user page table.
pagetable = uvmcreate();
// map the trampoline code (for system call return)
// at the highest user virtual address.
// only the supervisor uses it, on the way
// to/from user space, so not PTE_U.
mappages(pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampout, PTE_R | PTE_X);
// map the trapframe, for trampoline.S.
mappages(pagetable, (TRAMPOLINE - PGSIZE), PGSIZE,
(uint64)(p->tf), PTE_R | PTE_W);
return pagetable;
}
// Free a process's page table, and free the
// physical memory the page table refers to.
// Called both when a process exits and from
// exec() if it fails.
void
proc_freepagetable(pagetable_t pagetable, uint64 sz)
{
unmappages(pagetable, TRAMPOLINE, PGSIZE, 0);
unmappages(pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0);
uvmfree(pagetable, sz);
}
// a user program that calls exec("/init")
// od -t xC initcode
uchar initcode[] = {
0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02,
0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff,
0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00
};
//PAGEBREAK: 32
// Set up first user process.
void
userinit(void)
{
struct proc *p;
p = allocproc();
initproc = p;
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
// prepare for the very first kernel->user.
p->tf->epc = 0;
p->tf->sp = PGSIZE;
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
// this assignment to p->state lets other cores
// run this process. the acquire forces the above
// writes to be visible, and the lock is also needed
// because the assignment might not be atomic.
acquire(&ptable.lock);
p->state = RUNNABLE;
release(&ptable.lock);
}
// Grow current process's memory by n bytes.
// Return 0 on success, -1 on failure.
int
growproc(int n)
{
uint sz;
struct proc *p = myproc();
sz = p->sz;
if(n > 0){
if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0)
return -1;
} else if(n < 0){
if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0)
return -1;
}
p->sz = sz;
return 0;
}
// Create a new process, copying p as the parent.
// Sets up child kernel stack to return as if from system call.
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy user memory from parent to child.
uvmcopy(p->pagetable, np->pagetable, p->sz);
np->sz = p->sz;
np->parent = p;
// copy saved user registers.
*(np->tf) = *(p->tf);
// Cause fork to return 0 in the child.
np->tf->a0 = 0;
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
acquire(&ptable.lock);
np->state = RUNNABLE;
release(&ptable.lock);
return pid;
}
// Exit the current process. Does not return.
// An exited process remains in the zombie state
// until its parent calls wait().
void
exit(void)
{
struct proc *p = myproc();
struct proc *pp;
int fd;
if(p == initproc)
panic("init exiting");
// Close all open files.
for(fd = 0; fd < NOFILE; fd++){
if(p->ofile[fd]){
fileclose(p->ofile[fd]);
p->ofile[fd] = 0;
}
}
begin_op();
iput(p->cwd);
end_op();
p->cwd = 0;
acquire(&ptable.lock);
// Parent might be sleeping in wait().
wakeup1(p->parent);
// Pass abandoned children to init.
for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){
if(pp->parent == p){
pp->parent = initproc;
if(pp->state == ZOMBIE)
wakeup1(initproc);
}
}
// Jump into the scheduler, never to return.
p->state = ZOMBIE;
sched();
panic("zombie exit");
}
// Wait for a child process to exit and return its pid.
// Return -1 if this process has no children.
int
wait(void)
{
struct proc *np;
int havekids, pid;
struct proc *p = myproc();
acquire(&ptable.lock);
for(;;){
// Scan through table looking for exited children.
havekids = 0;
for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){
if(np->parent != p)
continue;
havekids = 1;
if(np->state == ZOMBIE){
// Found one.
pid = np->pid;
kfree(np->kstack);
np->kstack = 0;
kfree((void*)np->tf);
np->tf = 0;
proc_freepagetable(np->pagetable, np->sz);
np->pagetable = 0;
np->pid = 0;
np->parent = 0;
np->name[0] = 0;
np->killed = 0;
np->state = UNUSED;
release(&ptable.lock);
return pid;
}
}
// No point waiting if we don't have any children.
if(!havekids || p->killed){
release(&ptable.lock);
return -1;
}
// Wait for children to exit. (See wakeup1 call in proc_exit.)
sleep(p, &ptable.lock); //DOC: wait-sleep
}
}
//PAGEBREAK: 42
// Per-CPU process scheduler.
// Each CPU calls scheduler() after setting itself up.
// Scheduler never returns. It loops, doing:
// - choose a process to run
// - swtch to start running that process
// - eventually that process transfers control
// via swtch back to the scheduler.
void
scheduler(void)
{
struct proc *p;
struct cpu *c = mycpu();
c->proc = 0;
for(;;){
// Enable interrupts on this processor.
intr_on();
// Loop over process table looking for process to run.
acquire(&ptable.lock);
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->state != RUNNABLE)
continue;
// Switch to chosen process. It is the process's job
// to release ptable.lock and then reacquire it
// before jumping back to us.
c->proc = p;
p->state = RUNNING;
swtch(&c->scheduler, &p->context);
// Process is done running for now.
// It should have changed its p->state before coming back.
c->proc = 0;
}
release(&ptable.lock);
}
}
// Enter scheduler. Must hold only ptable.lock
// and have changed proc->state. Saves and restores
// intena because intena is a property of this
// kernel thread, not this CPU. It should
// be proc->intena and proc->noff, but that would
// break in the few places where a lock is held but
// there's no process.
void
sched(void)
{
int intena;
struct proc *p = myproc();
if(!holding(&ptable.lock))
panic("sched ptable.lock");
if(mycpu()->noff != 1)
panic("sched locks");
if(p->state == RUNNING)
panic("sched running");
if(intr_get())
panic("sched interruptible");
intena = mycpu()->intena;
swtch(&p->context, &mycpu()->scheduler);
mycpu()->intena = intena;
}
// Give up the CPU for one scheduling round.
void
yield(void)
{
acquire(&ptable.lock); //DOC: yieldlock
myproc()->state = RUNNABLE;
sched();
release(&ptable.lock);
}
// A fork child's very first scheduling by scheduler()
// will swtch to forkret.
void
forkret(void)
{
static int first = 1;
// Still holding ptable.lock from scheduler.
release(&ptable.lock);
if (first) {
// Some initialization functions must be run in the context
// of a regular process (e.g., they call sleep), and thus cannot
// be run from main().
first = 0;
iinit(ROOTDEV);
initlog(ROOTDEV);
}
usertrapret();
}
// Atomically release lock and sleep on chan.
// Reacquires lock when awakened.
void
sleep(void *chan, struct spinlock *lk)
{
struct proc *p = myproc();
if(p == 0)
panic("sleep");
if(lk == 0)
panic("sleep without lk");
// Must acquire ptable.lock in order to
// change p->state and then call sched.
// Once we hold ptable.lock, we can be
// guaranteed that we won't miss any wakeup
// (wakeup runs with ptable.lock locked),
// so it's okay to release lk.
if(lk != &ptable.lock){ //DOC: sleeplock0
acquire(&ptable.lock); //DOC: sleeplock1
release(lk);
}
// Go to sleep.
p->chan = chan;
p->state = SLEEPING;
sched();
// Tidy up.
p->chan = 0;
// Reacquire original lock.
if(lk != &ptable.lock){ //DOC: sleeplock2
release(&ptable.lock);
acquire(lk);
}
}
//PAGEBREAK!
// Wake up all processes sleeping on chan.
// The ptable lock must be held.
static void
wakeup1(void *chan)
{
struct proc *p;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
if(p->state == SLEEPING && p->chan == chan)
p->state = RUNNABLE;
}
// Wake up all processes sleeping on chan.
void
wakeup(void *chan)
{
acquire(&ptable.lock);
wakeup1(chan);
release(&ptable.lock);
}
// Kill the process with the given pid.
// Process won't exit until it returns
// to user space (see trap in trap.c).
int
kill(int pid)
{
struct proc *p;
acquire(&ptable.lock);
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->pid == pid){
p->killed = 1;
// Wake process from sleep if necessary.
if(p->state == SLEEPING)
p->state = RUNNABLE;
release(&ptable.lock);
return 0;
}
}
release(&ptable.lock);
return -1;
}
// Copy to either a user address, or kernel address,
// depending on usr_dst.
// Returns 0 on success, -1 on error.
int
either_copyout(int user_dst, uint64 dst, void *src, uint64 len)
{
struct proc *p = myproc();
if(user_dst){
return copyout(p->pagetable, dst, src, len);
} else {
memmove((char *)dst, src, len);
return 0;
}
}
// Copy from either a user address, or kernel address,
// depending on usr_src.
// Returns 0 on success, -1 on error.
int
either_copyin(void *dst, int user_src, uint64 src, uint64 len)
{
struct proc *p = myproc();
if(user_src){
return copyin(p->pagetable, dst, src, len);
} else {
memmove(dst, (char*)src, len);
return 0;
}
}
// Print a process listing to console. For debugging.
// Runs when user types ^P on console.
// No lock to avoid wedging a stuck machine further.
void
procdump(void)
{
static char *states[] = {
[UNUSED] "unused",
[EMBRYO] "embryo",
[SLEEPING] "sleep ",
[RUNNABLE] "runble",
[RUNNING] "run ",
[ZOMBIE] "zombie"
};
struct proc *p;
char *state;
for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
if(p->state == UNUSED)
continue;
if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
state = states[p->state];
else
state = "???";
printf("%d %s %s", p->pid, state, p->name);
printf("\n");
}
}

104
kernel/proc.h Normal file
View file

@ -0,0 +1,104 @@
// Saved registers for kernel context switches.
struct context {
uint64 ra;
uint64 sp;
// callee-saved
uint64 s0;
uint64 s1;
uint64 s2;
uint64 s3;
uint64 s4;
uint64 s5;
uint64 s6;
uint64 s7;
uint64 s8;
uint64 s9;
uint64 s10;
uint64 s11;
};
// Per-CPU state
struct cpu {
struct proc *proc; // The process running on this cpu or null
struct context scheduler; // swtch() here to enter scheduler
int noff; // Depth of push_off() nesting.
int intena; // Were interrupts enabled before push_off()?
};
extern struct cpu cpus[NCPU];
//PAGEBREAK: 17
// per-process data for the early trap handling code in trampoline.S.
// sits in a page by itself just under the trampoline page in the
// user page table. not specially mapped in the kernel page table.
// the sscratch register points here.
// trampoline.S saves user registers, then restores kernel_sp and
// kernel_satp.
// includes callee-saved registers like s0-s11 because the
// return-to-user path via usertrapret() doesn't return through
// the entire kernel call stack.
struct trapframe {
/* 0 */ uint64 kernel_satp;
/* 8 */ uint64 kernel_sp;
/* 16 */ uint64 kernel_trap; // usertrap()
/* 24 */ uint64 epc; // saved user program counter
/* 32 */ uint64 hartid;
/* 40 */ uint64 ra;
/* 48 */ uint64 sp;
/* 56 */ uint64 gp;
/* 64 */ uint64 tp;
/* 72 */ uint64 t0;
/* 80 */ uint64 t1;
/* 88 */ uint64 t2;
/* 96 */ uint64 s0;
/* 104 */ uint64 s1;
/* 112 */ uint64 a0;
/* 120 */ uint64 a1;
/* 128 */ uint64 a2;
/* 136 */ uint64 a3;
/* 144 */ uint64 a4;
/* 152 */ uint64 a5;
/* 160 */ uint64 a6;
/* 168 */ uint64 a7;
/* 176 */ uint64 s2;
/* 184 */ uint64 s3;
/* 192 */ uint64 s4;
/* 200 */ uint64 s5;
/* 208 */ uint64 s6;
/* 216 */ uint64 s7;
/* 224 */ uint64 s8;
/* 232 */ uint64 s9;
/* 240 */ uint64 s10;
/* 248 */ uint64 s11;
/* 256 */ uint64 t3;
/* 264 */ uint64 t4;
/* 272 */ uint64 t5;
/* 280 */ uint64 t6;
};
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
struct proc {
char *kstack; // Bottom of kernel stack for this process
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // Page table
enum procstate state; // Process state
int pid; // Process ID
struct proc *parent; // Parent process
struct trapframe *tf; // data page for trampoline.S
struct context context; // swtch() here to run process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
};
// Process memory is laid out contiguously, low addresses first:
// text
// original data and bss
// fixed-size stack
// expandable heap

45
kernel/ramdisk.c Normal file
View file

@ -0,0 +1,45 @@
//
// ramdisk that uses the disk image loaded by qemu -rdinit fs.img
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
void
ramdiskinit(void)
{
}
// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID.
// Else if B_VALID is not set, read buf from disk, set B_VALID.
void
ramdiskrw(struct buf *b)
{
if(!holdingsleep(&b->lock))
panic("ramdiskrw: buf not locked");
if((b->flags & (B_VALID|B_DIRTY)) == B_VALID)
panic("ramdiskrw: nothing to do");
if(b->blockno >= FSSIZE)
panic("ramdiskrw: blockno too big");
uint64 diskaddr = b->blockno * BSIZE;
char *addr = (char *)RAMDISK + diskaddr;
if(b->flags & B_DIRTY){
// write
memmove(addr, b->data, BSIZE);
b->flags &= ~B_DIRTY;
} else {
// read
memmove(b->data, addr, BSIZE);
b->flags |= B_VALID;
}
}

338
kernel/riscv.h Normal file
View file

@ -0,0 +1,338 @@
// which hart (core) is this?
static inline uint64
r_mhartid()
{
uint64 x;
asm volatile("csrr %0, mhartid" : "=r" (x) );
return x;
}
// Machine Status Register, mstatus
#define MSTATUS_MPP_MASK (3L << 11)
#define MSTATUS_MPP_M (3L << 11)
#define MSTATUS_MPP_S (1L << 11)
#define MSTATUS_MPP_U (0L << 11)
#define MSTATUS_MIE (1L << 3)
static inline uint64
r_mstatus()
{
uint64 x;
asm volatile("csrr %0, mstatus" : "=r" (x) );
return x;
}
static inline void
w_mstatus(uint64 x)
{
asm volatile("csrw mstatus, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_mepc(uint64 x)
{
asm volatile("csrw mepc, %0" : : "r" (x));
}
// Supervisor Status Register, sstatus
#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User
#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable
#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable
#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable
#define SSTATUS_UIE (1L << 0) // User Interrupt Enable
static inline uint64
r_sstatus()
{
uint64 x;
asm volatile("csrr %0, sstatus" : "=r" (x) );
return x;
}
static inline void
w_sstatus(uint64 x)
{
asm volatile("csrw sstatus, %0" : : "r" (x));
}
// Supervisor Interrupt Pending
static inline uint64
r_sip()
{
uint64 x;
asm volatile("csrr %0, sip" : "=r" (x) );
return x;
}
static inline void
w_sip(uint64 x)
{
asm volatile("csrw sip, %0" : : "r" (x));
}
// Supervisor Interrupt Enable
#define SIE_SEIE (1L << 9) // external
#define SIE_STIE (1L << 5) // timer
#define SIE_SSIE (1L << 1) // software
static inline uint64
r_sie()
{
uint64 x;
asm volatile("csrr %0, sie" : "=r" (x) );
return x;
}
static inline void
w_sie(uint64 x)
{
asm volatile("csrw sie, %0" : : "r" (x));
}
// Machine-mode Interrupt Enable
#define MIE_MEIE (1L << 11) // external
#define MIE_MTIE (1L << 7) // timer
#define MIE_MSIE (1L << 3) // software
static inline uint64
r_mie()
{
uint64 x;
asm volatile("csrr %0, mie" : "=r" (x) );
return x;
}
static inline void
w_mie(uint64 x)
{
asm volatile("csrw mie, %0" : : "r" (x));
}
// machine exception program counter, holds the
// instruction address to which a return from
// exception will go.
static inline void
w_sepc(uint64 x)
{
asm volatile("csrw sepc, %0" : : "r" (x));
}
static inline uint64
r_sepc()
{
uint64 x;
asm volatile("csrr %0, sepc" : "=r" (x) );
return x;
}
// Machine Exception Delegation
static inline uint64
r_medeleg()
{
uint64 x;
asm volatile("csrr %0, medeleg" : "=r" (x) );
return x;
}
static inline void
w_medeleg(uint64 x)
{
asm volatile("csrw medeleg, %0" : : "r" (x));
}
// Machine Interrupt Delegation
static inline uint64
r_mideleg()
{
uint64 x;
asm volatile("csrr %0, mideleg" : "=r" (x) );
return x;
}
static inline void
w_mideleg(uint64 x)
{
asm volatile("csrw mideleg, %0" : : "r" (x));
}
// Supervisor Trap-Vector Base Address
// low two bits are mode.
static inline void
w_stvec(uint64 x)
{
asm volatile("csrw stvec, %0" : : "r" (x));
}
static inline uint64
r_stvec()
{
uint64 x;
asm volatile("csrr %0, stvec" : "=r" (x) );
return x;
}
// Machine-mode interrupt vector
static inline void
w_mtvec(uint64 x)
{
asm volatile("csrw mtvec, %0" : : "r" (x));
}
// use riscv's sv39 page table scheme.
#define SATP_SV39 (8L << 60)
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
// supervisor address translation and protection;
// holds the address of the page table.
static inline void
w_satp(uint64 x)
{
asm volatile("csrw satp, %0" : : "r" (x));
}
static inline uint64
r_satp()
{
uint64 x;
asm volatile("csrr %0, satp" : "=r" (x) );
return x;
}
// Supervisor Scratch register, for early trap handler in trampoline.S.
static inline void
w_sscratch(uint64 x)
{
asm volatile("csrw sscratch, %0" : : "r" (x));
}
static inline void
w_mscratch(uint64 x)
{
asm volatile("csrw mscratch, %0" : : "r" (x));
}
// Supervisor Trap Cause
static inline uint64
r_scause()
{
uint64 x;
asm volatile("csrr %0, scause" : "=r" (x) );
return x;
}
// Supervisor Trap Value
static inline uint64
r_stval()
{
uint64 x;
asm volatile("csrr %0, stval" : "=r" (x) );
return x;
}
// Machine-mode Counter-Enable
static inline void
w_mcounteren(uint64 x)
{
asm volatile("csrw mcounteren, %0" : : "r" (x));
}
static inline uint64
r_mcounteren()
{
uint64 x;
asm volatile("csrr %0, mcounteren" : "=r" (x) );
return x;
}
// machine-mode cycle counter
static inline uint64
r_time()
{
uint64 x;
asm volatile("csrr %0, time" : "=r" (x) );
return x;
}
// enable interrupts
static inline void
intr_on()
{
w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE);
w_sstatus(r_sstatus() | SSTATUS_SIE);
}
// disable interrupts
static inline void
intr_off()
{
w_sstatus(r_sstatus() & ~SSTATUS_SIE);
}
// are interrupts enabled?
static inline int
intr_get()
{
uint64 x = r_sstatus();
return (x & SSTATUS_SIE) != 0;
}
static inline uint64
r_sp()
{
uint64 x;
asm volatile("mv %0, sp" : "=r" (x) );
return x;
}
// read and write tp, the thread pointer, which holds
// this core's hartid (core number), the index into cpus[].
static inline uint64
r_tp()
{
uint64 x;
asm volatile("mv %0, tp" : "=r" (x) );
return x;
}
static inline void
w_tp(uint64 x)
{
asm volatile("mv tp, %0" : : "r" (x));
}
#define PGSIZE 4096 // bytes per page
#define PGSHIFT 12 // bits of offset within a page
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
#define PTE2PA(pte) (((pte) >> 10) << 12)
#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U))
// extract the three 9-bit page table indices from a virtual address.
#define PXMASK 0x1FF // 9 bits
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs

55
kernel/sleeplock.c Normal file
View file

@ -0,0 +1,55 @@
// Sleeping locks
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "proc.h"
#include "spinlock.h"
#include "sleeplock.h"
void
initsleeplock(struct sleeplock *lk, char *name)
{
initlock(&lk->lk, "sleep lock");
lk->name = name;
lk->locked = 0;
lk->pid = 0;
}
void
acquiresleep(struct sleeplock *lk)
{
acquire(&lk->lk);
while (lk->locked) {
sleep(lk, &lk->lk);
}
lk->locked = 1;
lk->pid = myproc()->pid;
release(&lk->lk);
}
void
releasesleep(struct sleeplock *lk)
{
acquire(&lk->lk);
lk->locked = 0;
lk->pid = 0;
wakeup(lk);
release(&lk->lk);
}
int
holdingsleep(struct sleeplock *lk)
{
int r;
acquire(&lk->lk);
r = lk->locked && (lk->pid == myproc()->pid);
release(&lk->lk);
return r;
}

10
kernel/sleeplock.h Normal file
View file

@ -0,0 +1,10 @@
// Long-term locks for processes
struct sleeplock {
uint locked; // Is the lock held?
struct spinlock lk; // spinlock protecting this sleep lock
// For debugging:
char *name; // Name of lock.
int pid; // Process holding lock
};

110
kernel/spinlock.c Normal file
View file

@ -0,0 +1,110 @@
// Mutual exclusion spin locks.
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "proc.h"
#include "defs.h"
void
initlock(struct spinlock *lk, char *name)
{
lk->name = name;
lk->locked = 0;
lk->cpu = 0;
}
// Acquire the lock.
// Loops (spins) until the lock is acquired.
// Holding a lock for a long time may cause
// other CPUs to waste time spinning to acquire it.
void
acquire(struct spinlock *lk)
{
push_off(); // disable interrupts to avoid deadlock.
if(holding(lk))
panic("acquire");
// The xchg is atomic.
//while(xchg(&lk->locked, 1) != 0)
// ;
while(__sync_lock_test_and_set(&lk->locked, 1) != 0)
;
// Tell the C compiler and the processor to not move loads or stores
// past this point, to ensure that the critical section's memory
// references happen after the lock is acquired.
__sync_synchronize();
// Record info about lock acquisition for holding() and debugging.
lk->cpu = mycpu();
}
// Release the lock.
void
release(struct spinlock *lk)
{
if(!holding(lk))
panic("release");
lk->cpu = 0;
// Tell the C compiler and the processor to not move loads or stores
// past this point, to ensure that all the stores in the critical
// section are visible to other cores before the lock is released.
// Both the C compiler and the hardware may re-order loads and
// stores; __sync_synchronize() tells them both not to.
// On RISC-V, this turns into a fence instruction.
__sync_synchronize();
// Release the lock, equivalent to lk->locked = 0.
// This code can't use a C assignment, since it might
// not be atomic. A real OS would use C atomics here.
// On RISC-V, use an amoswap instruction.
//asm volatile("movl $0, %0" : "+m" (lk->locked) : );
__sync_lock_release(&lk->locked);
pop_off();
}
// Check whether this cpu is holding the lock.
int
holding(struct spinlock *lk)
{
int r;
push_off();
r = (lk->locked && lk->cpu == mycpu());
pop_off();
return r;
}
// push_off/pop_off are like intr_off()/intr_on() except that they are matched:
// it takes two pop_off to undo two push_off. Also, if interrupts
// are initially off, then push_off, pop_off leaves them off.
void
push_off(void)
{
struct cpu *c = mycpu();
int old = intr_get();
intr_off();
if(c->noff == 0)
c->intena = old;
c->noff += 1;
}
void
pop_off(void)
{
struct cpu *c = mycpu();
if(intr_get())
panic("pop_off - interruptible");
c->noff -= 1;
if(c->noff < 0)
panic("pop_off");
if(c->noff == 0 && c->intena)
intr_on();
}

9
kernel/spinlock.h Normal file
View file

@ -0,0 +1,9 @@
// Mutual exclusion lock.
struct spinlock {
uint locked; // Is the lock held?
// For debugging:
char *name; // Name of lock.
struct cpu *cpu; // The cpu holding the lock.
};

56
kernel/start.c Normal file
View file

@ -0,0 +1,56 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
void main();
// entry.S needs one stack per CPU.
__attribute__ ((aligned (16))) char stack0[4096 * NCPU];
// scratch area for timer interrupt, one per CPU.
uint64 mscratch0[NCPU * 32];
// assembly code in kernelvec for machine-mode timer interrupt.
extern void machinevec();
// entry.S jumps here in machine mode on stack0.
void
mstart()
{
// set M Previous Privilege mode to Supervisor, for mret.
unsigned long x = r_mstatus();
x &= ~MSTATUS_MPP_MASK;
x |= MSTATUS_MPP_S;
w_mstatus(x);
// set M Exception Program Counter to main, for mret.
// requires gcc -mcmodel=medany
w_mepc((uint64)main);
// disable paging for now.
w_satp(0);
// delegate all interrupts and exceptions to supervisor mode.
w_medeleg(0xffff);
w_mideleg(0xffff);
// set up to receive timer interrupts in machine mode,
// for pre-emptive switching and (on hart 0) to drive time.
int id = r_mhartid();
uint64 *scratch = &mscratch0[32 * id];
*(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + 10000;
scratch[4] = CLINT_MTIMECMP(id);
scratch[5] = 10000000;
w_mscratch((uint64)scratch);
w_mtvec((uint64)machinevec);
w_mstatus(r_mstatus() | MSTATUS_MIE);
w_mie(r_mie() | MIE_MTIE);
// keep each CPU's hartid in its tp register, for cpuid().
w_tp(id);
// call main() in supervisor mode.
asm volatile("mret");
}

11
kernel/stat.h Normal file
View file

@ -0,0 +1,11 @@
#define T_DIR 1 // Directory
#define T_FILE 2 // File
#define T_DEV 3 // Device
struct stat {
short type; // Type of file
int dev; // File system's disk device
uint ino; // Inode number
short nlink; // Number of links to file
uint size; // Size of file in bytes
};

104
kernel/string.c Normal file
View file

@ -0,0 +1,104 @@
#include "types.h"
void*
memset(void *dst, int c, uint n)
{
char *cdst = (char *) dst;
int i;
for(i = 0; i < n; i++){
cdst[i] = c;
}
return dst;
}
int
memcmp(const void *v1, const void *v2, uint n)
{
const uchar *s1, *s2;
s1 = v1;
s2 = v2;
while(n-- > 0){
if(*s1 != *s2)
return *s1 - *s2;
s1++, s2++;
}
return 0;
}
void*
memmove(void *dst, const void *src, uint n)
{
const char *s;
char *d;
s = src;
d = dst;
if(s < d && s + n > d){
s += n;
d += n;
while(n-- > 0)
*--d = *--s;
} else
while(n-- > 0)
*d++ = *s++;
return dst;
}
// memcpy exists to placate GCC. Use memmove.
void*
memcpy(void *dst, const void *src, uint n)
{
return memmove(dst, src, n);
}
int
strncmp(const char *p, const char *q, uint n)
{
while(n > 0 && *p && *p == *q)
n--, p++, q++;
if(n == 0)
return 0;
return (uchar)*p - (uchar)*q;
}
char*
strncpy(char *s, const char *t, int n)
{
char *os;
os = s;
while(n-- > 0 && (*s++ = *t++) != 0)
;
while(n-- > 0)
*s++ = 0;
return os;
}
// Like strncpy but guaranteed to NUL-terminate.
char*
safestrcpy(char *s, const char *t, int n)
{
char *os;
os = s;
if(n <= 0)
return os;
while(--n > 0 && (*s++ = *t++) != 0)
;
*s = 0;
return os;
}
int
strlen(const char *s)
{
int n;
for(n = 0; s[n]; n++)
;
return n;
}

42
kernel/swtch.S Normal file
View file

@ -0,0 +1,42 @@
# Context switch
#
# void swtch(struct context *old, struct context *new);
#
# Save current registers in old. Load from new.
.globl swtch
swtch:
sd ra, 0(a0)
sd sp, 8(a0)
sd s0, 16(a0)
sd s1, 24(a0)
sd s2, 32(a0)
sd s3, 40(a0)
sd s4, 48(a0)
sd s5, 56(a0)
sd s6, 64(a0)
sd s7, 72(a0)
sd s8, 80(a0)
sd s9, 88(a0)
sd s10, 96(a0)
sd s11, 104(a0)
ld ra, 0(a1)
ld sp, 8(a1)
ld s0, 16(a1)
ld s1, 24(a1)
ld s2, 32(a1)
ld s3, 40(a1)
ld s4, 48(a1)
ld s5, 56(a1)
ld s6, 64(a1)
ld s7, 72(a1)
ld s8, 80(a1)
ld s9, 88(a1)
ld s10, 96(a1)
ld s11, 104(a1)
ret

191
kernel/syscall.c Normal file
View file

@ -0,0 +1,191 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "proc.h"
#include "syscall.h"
#include "defs.h"
// User code makes a system call with INT T_SYSCALL.
// System call number in %eax.
// Arguments on the stack, from the user call to the C
// library system call function. The saved user %esp points
// to a saved program counter, and then the first argument.
// Fetch the int at addr from the current process.
int
fetchint(uint64 addr, int *ip)
{
struct proc *p = myproc();
if(addr >= p->sz || addr+4 > p->sz)
return -1;
if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
return -1;
return 0;
}
// Fetch the uint64 at addr from the current process.
int
fetchaddr(uint64 addr, uint64 *ip)
{
struct proc *p = myproc();
if(addr >= p->sz || addr+sizeof(uint64) > p->sz)
return -1;
if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
return -1;
return 0;
}
// Fetch the nul-terminated string at addr from the current process.
// Doesn't actually copy the string - just sets *pp to point at it.
// Returns length of string, not including nul, or -1 for error.
int
fetchstr(uint64 addr, char *buf, int max)
{
struct proc *p = myproc();
int err = copyinstr(p->pagetable, buf, addr, max);
if(err < 0)
return err;
return strlen(buf);
}
static uint64
fetcharg(int n)
{
struct proc *p = myproc();
switch (n) {
case 0:
return p->tf->a0;
case 1:
return p->tf->a1;
case 2:
return p->tf->a2;
case 3:
return p->tf->a3;
case 4:
return p->tf->a4;
case 5:
return p->tf->a5;
}
panic("fetcharg");
return -1;
}
// Fetch the nth 32-bit system call argument.
int
argint(int n, int *ip)
{
*ip = fetcharg(n);
return 0;
}
int
argaddr(int n, uint64 *ip)
{
*ip = fetcharg(n);
return 0;
}
// Fetch the nth word-sized system call argument as a pointer
// to a block of memory of size bytes. Check that the pointer
// lies within the process address space.
int
argptr(int n, uint64 *pp, int size)
{
uint64 i;
struct proc *p = myproc();
if(argaddr(n, &i) < 0)
return -1;
if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz)
return -1;
*pp = i;
return 0;
}
// Fetch the nth word-sized system call argument as a null-terminated string.
// Copies into buf, at most max.
// Returns string length if OK (including nul), -1 if error.
int
argstr(int n, char *buf, int max)
{
uint64 addr;
if(argaddr(n, &addr) < 0)
return -1;
return fetchstr(addr, buf, max);
}
extern int sys_chdir(void);
extern int sys_close(void);
extern int sys_dup(void);
extern int sys_exec(void);
extern int sys_exit(void);
extern int sys_fork(void);
extern int sys_fstat(void);
extern int sys_getpid(void);
extern int sys_kill(void);
extern int sys_link(void);
extern int sys_mkdir(void);
extern int sys_mknod(void);
extern int sys_open(void);
extern int sys_pipe(void);
extern int sys_read(void);
extern int sys_sbrk(void);
extern int sys_sleep(void);
extern int sys_unlink(void);
extern int sys_wait(void);
extern int sys_write(void);
extern int sys_uptime(void);
static int (*syscalls[])(void) = {
[SYS_fork] sys_fork,
[SYS_exit] sys_exit,
[SYS_wait] sys_wait,
[SYS_pipe] sys_pipe,
[SYS_read] sys_read,
[SYS_kill] sys_kill,
[SYS_exec] sys_exec,
[SYS_fstat] sys_fstat,
[SYS_chdir] sys_chdir,
[SYS_dup] sys_dup,
[SYS_getpid] sys_getpid,
[SYS_sbrk] sys_sbrk,
[SYS_sleep] sys_sleep,
[SYS_uptime] sys_uptime,
[SYS_open] sys_open,
[SYS_write] sys_write,
[SYS_mknod] sys_mknod,
[SYS_unlink] sys_unlink,
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
};
static void
dosyscall(void)
{
int num;
struct proc *p = myproc();
num = p->tf->a7;
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
p->tf->a0 = syscalls[num]();
} else {
printf("%d %s: unknown sys call %d\n",
p->pid, p->name, num);
p->tf->a0 = -1;
}
}
void
syscall()
{
if(myproc()->killed)
exit();
dosyscall();
if(myproc()->killed)
exit();
return;
}

22
kernel/syscall.h Normal file
View file

@ -0,0 +1,22 @@
// System call numbers
#define SYS_fork 1
#define SYS_exit 2
#define SYS_wait 3
#define SYS_pipe 4
#define SYS_read 5
#define SYS_kill 6
#define SYS_exec 7
#define SYS_fstat 8
#define SYS_chdir 9
#define SYS_dup 10
#define SYS_getpid 11
#define SYS_sbrk 12
#define SYS_sleep 13
#define SYS_uptime 14
#define SYS_open 15
#define SYS_write 16
#define SYS_mknod 17
#define SYS_unlink 18
#define SYS_link 19
#define SYS_mkdir 20
#define SYS_close 21

465
kernel/sysfile.c Normal file
View file

@ -0,0 +1,465 @@
//
// File-system system calls.
// Mostly argument checking, since we don't trust
// user code, and calls into file.c and fs.c.
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "stat.h"
#include "proc.h"
#include "fs.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "file.h"
#include "fcntl.h"
// Fetch the nth word-sized system call argument as a file descriptor
// and return both the descriptor and the corresponding struct file.
static int
argfd(int n, int *pfd, struct file **pf)
{
int fd;
struct file *f;
if(argint(n, &fd) < 0)
return -1;
if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0)
return -1;
if(pfd)
*pfd = fd;
if(pf)
*pf = f;
return 0;
}
// Allocate a file descriptor for the given file.
// Takes over file reference from caller on success.
static int
fdalloc(struct file *f)
{
int fd;
struct proc *p = myproc();
for(fd = 0; fd < NOFILE; fd++){
if(p->ofile[fd] == 0){
p->ofile[fd] = f;
return fd;
}
}
return -1;
}
int
sys_dup(void)
{
struct file *f;
int fd;
if(argfd(0, 0, &f) < 0)
return -1;
if((fd=fdalloc(f)) < 0)
return -1;
filedup(f);
return fd;
}
int
sys_read(void)
{
struct file *f;
int n;
uint64 p;
if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0)
return -1;
return fileread(f, p, n);
}
int
sys_write(void)
{
struct file *f;
int n;
uint64 p;
if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0)
return -1;
return filewrite(f, p, n);
}
int
sys_close(void)
{
int fd;
struct file *f;
if(argfd(0, &fd, &f) < 0)
return -1;
myproc()->ofile[fd] = 0;
fileclose(f);
return 0;
}
int
sys_fstat(void)
{
struct file *f;
uint64 st; // user pointer to struct stat
if(argfd(0, 0, &f) < 0 || argptr(1, &st, sizeof(struct stat)) < 0)
return -1;
return filestat(f, st);
}
// Create the path new as a link to the same inode as old.
int
sys_link(void)
{
char name[DIRSIZ], new[MAXPATH], old[MAXPATH];
struct inode *dp, *ip;
if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)
return -1;
begin_op();
if((ip = namei(old)) == 0){
end_op();
return -1;
}
ilock(ip);
if(ip->type == T_DIR){
iunlockput(ip);
end_op();
return -1;
}
ip->nlink++;
iupdate(ip);
iunlock(ip);
if((dp = nameiparent(new, name)) == 0)
goto bad;
ilock(dp);
if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){
iunlockput(dp);
goto bad;
}
iunlockput(dp);
iput(ip);
end_op();
return 0;
bad:
ilock(ip);
ip->nlink--;
iupdate(ip);
iunlockput(ip);
end_op();
return -1;
}
// Is the directory dp empty except for "." and ".." ?
static int
isdirempty(struct inode *dp)
{
int off;
struct dirent de;
for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){
if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
panic("isdirempty: readi");
if(de.inum != 0)
return 0;
}
return 1;
}
//PAGEBREAK!
int
sys_unlink(void)
{
struct inode *ip, *dp;
struct dirent de;
char name[DIRSIZ], path[MAXPATH];
uint off;
if(argstr(0, path, MAXPATH) < 0)
return -1;
begin_op();
if((dp = nameiparent(path, name)) == 0){
end_op();
return -1;
}
ilock(dp);
// Cannot unlink "." or "..".
if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0)
goto bad;
if((ip = dirlookup(dp, name, &off)) == 0)
goto bad;
ilock(ip);
if(ip->nlink < 1)
panic("unlink: nlink < 1");
if(ip->type == T_DIR && !isdirempty(ip)){
iunlockput(ip);
goto bad;
}
memset(&de, 0, sizeof(de));
if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
panic("unlink: writei");
if(ip->type == T_DIR){
dp->nlink--;
iupdate(dp);
}
iunlockput(dp);
ip->nlink--;
iupdate(ip);
iunlockput(ip);
end_op();
return 0;
bad:
iunlockput(dp);
end_op();
return -1;
}
static struct inode*
create(char *path, short type, short major, short minor)
{
uint off;
struct inode *ip, *dp;
char name[DIRSIZ];
if((dp = nameiparent(path, name)) == 0)
return 0;
ilock(dp);
if((ip = dirlookup(dp, name, &off)) != 0){
iunlockput(dp);
ilock(ip);
if(type == T_FILE && ip->type == T_FILE)
return ip;
iunlockput(ip);
return 0;
}
if((ip = ialloc(dp->dev, type)) == 0)
panic("create: ialloc");
ilock(ip);
ip->major = major;
ip->minor = minor;
ip->nlink = 1;
iupdate(ip);
if(type == T_DIR){ // Create . and .. entries.
dp->nlink++; // for ".."
iupdate(dp);
// No ip->nlink++ for ".": avoid cyclic ref count.
if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0)
panic("create dots");
}
if(dirlink(dp, name, ip->inum) < 0)
panic("create: dirlink");
iunlockput(dp);
return ip;
}
int
sys_open(void)
{
char path[MAXPATH];
int fd, omode;
struct file *f;
struct inode *ip;
if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0)
return -1;
begin_op();
if(omode & O_CREATE){
ip = create(path, T_FILE, 0, 0);
if(ip == 0){
end_op();
return -1;
}
} else {
if((ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
if(ip->type == T_DIR && omode != O_RDONLY){
iunlockput(ip);
end_op();
return -1;
}
}
if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){
if(f)
fileclose(f);
iunlockput(ip);
end_op();
return -1;
}
iunlock(ip);
end_op();
f->type = FD_INODE;
f->ip = ip;
f->off = 0;
f->readable = !(omode & O_WRONLY);
f->writable = (omode & O_WRONLY) || (omode & O_RDWR);
return fd;
}
int
sys_mkdir(void)
{
char path[MAXPATH];
struct inode *ip;
begin_op();
if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
end_op();
return -1;
}
iunlockput(ip);
end_op();
return 0;
}
int
sys_mknod(void)
{
struct inode *ip;
char path[MAXPATH];
int major, minor;
begin_op();
if((argstr(0, path, MAXPATH)) < 0 ||
argint(1, &major) < 0 ||
argint(2, &minor) < 0 ||
(ip = create(path, T_DEV, major, minor)) == 0){
end_op();
return -1;
}
iunlockput(ip);
end_op();
return 0;
}
int
sys_chdir(void)
{
char path[MAXPATH];
struct inode *ip;
struct proc *p = myproc();
begin_op();
if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
if(ip->type != T_DIR){
iunlockput(ip);
end_op();
return -1;
}
iunlock(ip);
iput(p->cwd);
end_op();
p->cwd = ip;
return 0;
}
int
sys_exec(void)
{
char path[MAXPATH], *argv[MAXARG];
int i;
uint64 uargv, uarg;
if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){
return -1;
}
memset(argv, 0, sizeof(argv));
for(i=0;; i++){
if(i >= NELEM(argv)){
return -1;
}
if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){
return -1;
}
if(uarg == 0){
argv[i] = 0;
break;
}
argv[i] = kalloc();
if(argv[i] == 0)
panic("sys_exec kalloc");
if(fetchstr(uarg, argv[i], PGSIZE) < 0){
return -1;
}
}
int ret = exec(path, argv);
for(i = 0; i < NELEM(argv) && argv[i] != 0; i++)
kfree(argv[i]);
return ret;
}
int
sys_pipe(void)
{
uint64 fdarray; // user pointer to array of two integers
struct file *rf, *wf;
int fd0, fd1;
struct proc *p = myproc();
if(argptr(0, &fdarray, 2*sizeof(int)) < 0)
return -1;
if(pipealloc(&rf, &wf) < 0)
return -1;
fd0 = -1;
if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){
if(fd0 >= 0)
p->ofile[fd0] = 0;
fileclose(rf);
fileclose(wf);
return -1;
}
if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 ||
copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){
p->ofile[fd0] = 0;
p->ofile[fd1] = 0;
fileclose(rf);
fileclose(wf);
return -1;
}
return 0;
}

90
kernel/sysproc.c Normal file
View file

@ -0,0 +1,90 @@
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "date.h"
#include "param.h"
#include "memlayout.h"
#include "proc.h"
int
sys_exit(void)
{
exit();
return 0; // not reached
}
int
sys_getpid(void)
{
return myproc()->pid;
}
int
sys_fork(void)
{
return fork();
}
int
sys_wait(void)
{
return wait();
}
int
sys_sbrk(void)
{
int addr;
int n;
if(argint(0, &n) < 0)
return -1;
addr = myproc()->sz;
if(growproc(n) < 0)
return -1;
return addr;
}
int
sys_sleep(void)
{
int n;
uint ticks0;
if(argint(0, &n) < 0)
return -1;
acquire(&tickslock);
ticks0 = ticks;
while(ticks - ticks0 < n){
if(myproc()->killed){
release(&tickslock);
return -1;
}
sleep(&ticks, &tickslock);
}
release(&tickslock);
return 0;
}
int
sys_kill(void)
{
int pid;
if(argint(0, &pid) < 0)
return -1;
return kill(pid);
}
// return how many clock tick interrupts have occurred
// since start.
int
sys_uptime(void)
{
uint xticks;
acquire(&tickslock);
xticks = ticks;
release(&tickslock);
return xticks;
}

137
kernel/trampoline.S Normal file
View file

@ -0,0 +1,137 @@
#
# code to switch between user and kernel space.
#
# this code is mapped at the same virtual address
# in user and kernel space so that it can switch
# page tables.
#
# kernel.ld causes trampout to be aligned
# to a page boundary.
#
.globl usertrap
.section trampoline
.globl trampout
trampout:
# switch from kernel to user.
# usertrapret() calls here.
# a0: p->tf in user page table
# a1: new value for satp, for user page table
# switch to user page table
csrw satp, a1
# put the saved user a0 in sscratch, so we
# can swap it with our a0 (p->tf) in the last step.
ld t0, 112(a0)
csrw sscratch, t0
# restore all but a0 from p->tf
ld ra, 40(a0)
ld sp, 48(a0)
ld gp, 56(a0)
ld tp, 64(a0)
ld t0, 72(a0)
ld t1, 80(a0)
ld t2, 88(a0)
ld s0, 96(a0)
ld s1, 104(a0)
ld a1, 120(a0)
ld a2, 128(a0)
ld a3, 136(a0)
ld a4, 144(a0)
ld a5, 152(a0)
ld a6, 160(a0)
ld a7, 168(a0)
ld s2, 176(a0)
ld s3, 184(a0)
ld s4, 192(a0)
ld s5, 200(a0)
ld s6, 208(a0)
ld s7, 216(a0)
ld s8, 224(a0)
ld s9, 232(a0)
ld s10, 240(a0)
ld s11, 248(a0)
ld t3, 256(a0)
ld t4, 264(a0)
ld t5, 272(a0)
ld t6, 280(a0)
# restore user a0, and save p->tf
csrrw a0, sscratch, a0
# return to user mode and user pc.
# caller has set up sstatus and sepc.
sret
.align 4
.globl trampin
trampin:
#
# trap.c set stvec to point here, so
# user interrupts and exceptions start here,
# in supervisor mode, but with a
# user page table.
#
# sscratch points to where the process's p->tf is
# mapped into user space (TRAMPOLINE - 4096).
#
# swap a0 and sscratch
# so that a0 is p->tf
csrrw a0, sscratch, a0
# save the user registers in p->tf
sd ra, 40(a0)
sd sp, 48(a0)
sd gp, 56(a0)
sd tp, 64(a0)
sd t0, 72(a0)
sd t1, 80(a0)
sd t2, 88(a0)
sd s0, 96(a0)
sd s1, 104(a0)
sd a1, 120(a0)
sd a2, 128(a0)
sd a3, 136(a0)
sd a4, 144(a0)
sd a5, 152(a0)
sd a6, 160(a0)
sd a7, 168(a0)
sd s2, 176(a0)
sd s3, 184(a0)
sd s4, 192(a0)
sd s5, 200(a0)
sd s6, 208(a0)
sd s7, 216(a0)
sd s8, 224(a0)
sd s9, 232(a0)
sd s10, 240(a0)
sd s11, 248(a0)
sd t3, 256(a0)
sd t4, 264(a0)
sd t5, 272(a0)
sd t6, 280(a0)
# save the user a0 in p->tf->a0
csrr t0, sscratch
sd t0, 112(a0)
# restore kernel stack pointer from p->tf->kernel_sp
ld sp, 8(a0)
# make tp hold the current hartid, from p->tf->hartid
ld tp, 32(a0)
# remember the address of usertrap(), p->tf->kernel_trap
ld t0, 16(a0)
# restore kernel page table from p->tf->kernel_satp
ld t1, 0(a0)
csrw satp, t1
# a0 is no longer valid, since the kernel page
# table does not specially map p->td.
# jump to usertrap(), which does not return
jr t0

184
kernel/trap.c Normal file
View file

@ -0,0 +1,184 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "proc.h"
#include "spinlock.h"
#include "defs.h"
struct spinlock tickslock;
uint ticks;
extern char trampout[], trampin[];
// in kernelvec.S, calls kerneltrap().
void kernelvec();
extern int devintr();
void
trapinit(void)
{
initlock(&tickslock, "time");
}
// set up to take exceptions and traps while in the kernel.
void
trapinithart(void)
{
w_stvec((uint64)kernelvec);
}
//
// handle an interrupt, exception, or system call from user space.
// called from trampoline.S
//
void
usertrap(void)
{
int which_dev = 0;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kernelvec);
struct proc *p = myproc();
// save user program counter.
p->tf->epc = r_sepc();
intr_on();
if(r_scause() == 8){
// system call
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->tf->epc += 4;
syscall();
} else if((which_dev = devintr()) != 0){
// ok
} else {
printf("usertrap(): unexpected scause 0x%p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
p->killed = 1;
}
if(p->killed)
exit();
// give up the CPU if this is a timer interrupt.
if(which_dev == 2)
yield();
usertrapret();
}
//
// return to user space
//
void
usertrapret(void)
{
struct proc *p = myproc();
// turn off interrupts, since we're switching
// now from kerneltrap() to usertrap().
intr_off();
// send interrupts and exceptions to trampoline.S
w_stvec(TRAMPOLINE + (trampin - trampout));
// set up values that trampoline.S will need when
// the process next re-enters the kernel.
p->tf->kernel_satp = r_satp();
p->tf->kernel_sp = (uint64)p->kstack + PGSIZE;
p->tf->kernel_trap = (uint64)usertrap;
p->tf->hartid = r_tp();
// set up the registers that trampoline.S's sret will use
// to get to user space.
// set S Previous Privilege mode to User.
unsigned long x = r_sstatus();
x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
x |= SSTATUS_SPIE; // enable interrupts in user mode
w_sstatus(x);
// set S Exception Program Counter to the saved user pc.
w_sepc(p->tf->epc);
// tell trampline.S the user page table to switch to.
uint64 satp = MAKE_SATP(p->pagetable);
// jump to trampoline.S at the top of memory, which
// switches to the user page table, restores user registers,
// and switches to user mode with sret.
((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp);
}
// interrupts and exceptions from kernel code go here,
// on whatever the current kernel stack is.
// must be 4-byte aligned to fit in stvec.
void
kerneltrap()
{
uint64 sstatus = r_sstatus();
uint64 scause = r_scause();
if((sstatus & SSTATUS_SPP) == 0)
panic("kerneltrap: not from supervisor mode");
if(intr_get() != 0)
panic("kerneltrap: interrupts enabled");
if(devintr() == 0){
printf("scause 0x%p\n", scause);
printf("sepc=%p stval=%p\n", r_sepc(), r_stval());
panic("kerneltrap");
}
}
// check if it's an external interrupt or software interrupt,
// and handle it.
// returns 2 if timer interrupt,
// 1 if other device,
// 0 if not recognized.
int
devintr()
{
uint64 scause = r_scause();
if((scause & 0x8000000000000000L) &&
(scause & 0xff) == 9){
// supervisor external interrupt, via PLIC.
int irq = plic_claim();
if(irq == UART0_IRQ){
uartintr();
}
plic_complete(irq);
return 1;
} else if(scause == 0x8000000000000001){
// software interrupt from a machine-mode timer interrupt.
if(cpuid() == 0){
acquire(&tickslock);
ticks++;
wakeup(&ticks);
release(&tickslock);
}
// acknowledge.
w_sip(r_sip() & ~2);
return 2;
} else {
return 0;
}
}

10
kernel/types.h Normal file
View file

@ -0,0 +1,10 @@
typedef unsigned int uint;
typedef unsigned short ushort;
typedef unsigned char uchar;
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned long uint64;
typedef uint64 pde_t;

75
kernel/uart.c Normal file
View file

@ -0,0 +1,75 @@
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "proc.h"
#include "spinlock.h"
#include "defs.h"
//
// qemu -machine virt has a 16550a UART
// qemu/hw/riscv/virt.c
// http://byterunner.com/16550.html
//
// caller should lock.
//
// address of one of the registers
#define R(reg) ((volatile unsigned char *)(UART0 + reg))
void
uartinit(void)
{
// disable interrupts -- IER
*R(1) = 0x00;
// special mode to set baud rate
*R(3) = 0x80;
// LSB for baud rate of 38.4K
*R(0) = 0x03;
// MSB for baud rate of 38.4K
*R(1) = 0x00;
// leave set-baud mode,
// and set word length to 8 bits, no parity.
*R(3) = 0x03;
// reset and enable FIFOs -- FCR.
*R(2) = 0x07;
// enable receive interrupts -- IER.
*R(1) = 0x01;
}
void
uartputc(int c)
{
// wait for Transmit Holding Empty to be set in LSR.
while((*R(5) & (1 << 5)) == 0)
;
*R(0) = c;
}
int
uartgetc(void)
{
if(*R(5) & 0x01){
// input data is ready.
return *R(0);
} else {
return -1;
}
}
void
uartintr(void)
{
while(1){
int c = uartgetc();
if(c == -1)
break;
consoleintr(c);
}
}

389
kernel/vm.c Normal file
View file

@ -0,0 +1,389 @@
#include "param.h"
#include "types.h"
#include "memlayout.h"
#include "elf.h"
#include "riscv.h"
#include "defs.h"
#include "fs.h"
/*
* the kernel's page table.
*/
pagetable_t kernel_pagetable;
extern char etext[]; // kernel.ld sets this to end of kernel code.
extern char trampout[]; // trampoline.S
/*
* create a direct-map page table for the kernel and
* turn on paging. called early, in supervisor mode.
* the page allocator is already initialized.
*/
void
kvminit()
{
kernel_pagetable = (pagetable_t) kalloc();
memset(kernel_pagetable, 0, PGSIZE);
// uart registers
mappages(kernel_pagetable, UART0, PGSIZE,
UART0, PTE_R | PTE_W);
// CLINT
mappages(kernel_pagetable, CLINT, 0x10000,
CLINT, PTE_R | PTE_W);
// PLIC
mappages(kernel_pagetable, PLIC, 0x4000000,
PLIC, PTE_R | PTE_W);
// map kernel text executable and read-only.
mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE,
KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of.
mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext,
(uint64)etext, PTE_R | PTE_W);
// map the qemu -initrd fs.img ramdisk
mappages(kernel_pagetable, RAMDISK, FSSIZE * BSIZE,
RAMDISK, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
mappages(kernel_pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampout, PTE_R | PTE_X);
}
// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart()
{
w_satp(MAKE_SATP(kernel_pagetable));
}
// Return the address of the PTE in page table pagetable
// that corresponds to virtual address va. If alloc!=0,
// create any required page table pages.
//
// The risc-v Sv39 scheme has three levels of page table
// pages. A page table page contains 512 64-bit PTEs.
// A 64-bit virtual address is split into five fields:
// 39..63 -- must be zero.
// 30..38 -- 9 bits of level-2 index.
// 21..39 -- 9 bits of level-1 index.
// 12..20 -- 9 bits of level-0 index.
// 0..12 -- 12 bits of byte offset within the page.
static pte_t *
walk(pagetable_t pagetable, uint64 va, int alloc)
{
if(va >= MAXVA)
panic("walk");
for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0;
memset(pagetable, 0, PGSIZE);
*pte = PA2PTE(pagetable) | PTE_V;
}
}
return &pagetable[PX(0, va)];
}
// Look up a virtual address, return the physical address,
// Can only be used to look up user pages.
// or 0 if not mapped.
uint64
walkaddr(pagetable_t pagetable, uint64 va)
{
pte_t *pte;
uint64 pa;
pte = walk(pagetable, va, 0);
if(pte == 0)
return 0;
if((*pte & PTE_V) == 0)
return 0;
if((*pte & PTE_U) == 0)
return 0;
pa = PTE2PA(*pte);
return pa;
}
// Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa. va and size might not
// be page-aligned.
void
mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
uint64 a, last;
pte_t *pte;
a = PGROUNDDOWN(va);
last = PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walk(pagetable, a, 1)) == 0)
panic("mappages: walk");
if(*pte & PTE_V)
panic("remap");
*pte = PA2PTE(pa) | perm | PTE_V;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
}
// Remove mappings from a page table. The mappings in
// the given range must exist. Optionally free the
// physical memory.
void
unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free)
{
uint64 a, last;
pte_t *pte;
uint64 pa;
a = PGROUNDDOWN(va);
last = PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walk(pagetable, a, 0)) == 0)
panic("unmappages: walk");
if((*pte & PTE_V) == 0){
printf("va=%p pte=%p\n", a, *pte);
panic("unmappages: not mapped");
}
if(PTE_FLAGS(*pte) == PTE_V)
panic("unmappages: not a leaf");
if(do_free){
pa = PTE2PA(*pte);
kfree((void*)pa);
}
*pte = 0;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
}
// create an empty user page table.
pagetable_t
uvmcreate()
{
pagetable_t pagetable;
pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
panic("uvmcreate: out of memory");
memset(pagetable, 0, PGSIZE);
return pagetable;
}
// Load the user initcode into address 0 of pagetable,
// for the very first process.
// sz must be less than a page.
void
uvminit(pagetable_t pagetable, uchar *src, uint sz)
{
char *mem;
if(sz >= PGSIZE)
panic("inituvm: more than a page");
mem = kalloc();
memset(mem, 0, PGSIZE);
mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
memmove(mem, src, sz);
}
// Allocate PTEs and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
uint64
uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
char *mem;
uint64 a;
if(newsz < oldsz)
return oldsz;
oldsz = PGROUNDUP(oldsz);
a = oldsz;
for(; a < newsz; a += PGSIZE){
mem = kalloc();
if(mem == 0){
uvmdealloc(pagetable, a, oldsz);
return 0;
}
memset(mem, 0, PGSIZE);
mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U);
}
return newsz;
}
// Deallocate user pages to bring the process size from oldsz to
// newsz. oldsz and newsz need not be page-aligned, nor does newsz
// need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size.
uint64
uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
if(newsz >= oldsz)
return oldsz;
unmappages(pagetable, newsz, oldsz - newsz, 1);
return newsz;
}
// Recursively free page table pages.
// All leaf mappings must already have been removed.
static void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}
// Free user memory pages,
// then free page table pages.
void
uvmfree(pagetable_t pagetable, uint64 sz)
{
unmappages(pagetable, 0, sz, 1);
freewalk(pagetable);
}
// Given a parent process's page table, copy
// its memory into a child's page table.
// Copies both the page table and the
// physical memory.
void
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
char *mem;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("copyuvm: pte should exist");
if((*pte & PTE_V) == 0)
panic("copyuvm: page not present");
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0)
panic("uvmcopy: kalloc failed");
memmove(mem, (char*)pa, PGSIZE);
mappages(new, i, PGSIZE, (uint64)mem, flags);
}
}
// Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
while(len > 0){
va0 = (uint)PGROUNDDOWN(dstva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
// Copy from user to kernel.
// Copy len bytes to dst from virtual address srcva in a given page table.
// Return 0 on success, -1 on error.
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
uint64 n, va0, pa0;
while(len > 0){
va0 = (uint)PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > len)
n = len;
memmove(dst, (void *)(pa0 + (srcva - va0)), n);
len -= n;
dst += n;
srcva = va0 + PGSIZE;
}
return 0;
}
// Copy a null-terminated string from user to kernel.
// Copy bytes to dst from virtual address srcva in a given page table,
// until a '\0', or max.
// Return 0 on success, -1 on error.
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
uint64 n, va0, pa0;
int got_null = 0;
while(got_null == 0 && max > 0){
va0 = (uint)PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > max)
n = max;
char *p = (char *) (pa0 + (srcva - va0));
while(n > 0){
if(*p == '\0'){
*dst = '\0';
got_null = 1;
break;
} else {
*dst = *p;
}
--n;
--max;
p++;
dst++;
}
srcva = va0 + PGSIZE;
}
if(got_null){
return 0;
} else {
return -1;
}
}