diff --git a/.clang-format b/.clang-format index a4fbf68..1edc97c 100644 --- a/.clang-format +++ b/.clang-format @@ -2,8 +2,9 @@ BasedOnStyle: LLVM IndentWidth: 4 # Use 4 spaces for indentation TabWidth: 4 # Tab width is also 4 spaces UseTab: Never # Always use spaces instead of tabs -ColumnLimit: 80 # Wrap lines after 80 characters +ColumnLimit: 120 # Wrap lines after 80 characters AllowShortLoopsOnASingleLine: true +AllowShortFunctionsOnASingleLine: false AlwaysBreakTemplateDeclarations: true BreakConstructorInitializers: BeforeComma AlignConsecutiveDeclarations: @@ -14,3 +15,4 @@ AlignConsecutiveDeclarations: AlignFunctionPointers: false PadOperators: false AlignConsecutiveMacros: true +AllowShortCaseLabelsOnASingleLine: true diff --git a/Makefile b/Makefile index 3d29dc2..99d8dfc 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ CFLAGS += -fno-omit-frame-pointer # More reliable backtraces in GDB all: kernel.elf -kernel.elf: entry.o start.o lib/string.o lib/proc.o lib/spinlock.o lib/proc.o lib/uart.o lib/panic.o kern/kalloc.o lib/memory.o +kernel.elf: entry.o start.o lib/string.o lib/proc.o lib/proc.o lib/uart.o lib/panic.o kern/kalloc.o lib/memory.o kern/ispinlock.o lib/spinlock.o @echo LD $@ @$(LD) $(LDFLAGS) -o $@ $^ @@ -48,3 +48,5 @@ clean: rm -f *.o *.elf *.d lib/*.o lib/*.d -include *.d + +.PHONY: all diff --git a/kern/ispinlock.c b/kern/ispinlock.c new file mode 100644 index 0000000..e2a9e79 --- /dev/null +++ b/kern/ispinlock.c @@ -0,0 +1,45 @@ +#include "ispinlock.h" + +void spinlock_init(spinlock_t *l) { + l->v = 0; +} + +bool spin_trylock(spinlock_t *l) { + uint32_t old; + // old = xchg_acquire(&l->v, 1) using AMO + __asm__ volatile("amoswap.w.aq %0, %2, (%1)\n" : "=&r"(old) : "r"(&l->v), "r"(1u) : "memory"); + return old == 0; +} + +void spin_unlock(spinlock_t *l) { + // Release: store 0 with .rl ordering. + uint32_t dummy; + __asm__ volatile("amoswap.w.rl %0, %2, (%1)\n" : "=&r"(dummy) : "r"(&l->v), "r"(0u) : "memory"); +} + +// Optional: tiny pause/backoff (works even if Zihintpause isn't present). +// See: https://github.com/riscv/riscv-isa-manual/blob/main/src/zihintpause.adoc +void cpu_relax(void) { +#if defined(__riscv_zihintpause) + __asm__ volatile("pause"); +#else + __asm__ volatile("nop"); +#endif +} + +// Test-and-test-and-set acquire with polite spinning + exponential backoff. +void spin_lock(spinlock_t *l) { + unsigned backoff = 1; + for (;;) { + if (spin_trylock(l)) + return; + + // Contended: spin on plain loads (no AMO) until it looks free. + while (__atomic_load_n(&l->v, __ATOMIC_RELAXED) != 0) { + for (unsigned i = 0; i < backoff; ++i) cpu_relax(); + if (backoff < 1u << 12) + backoff <<= 1; + } + // Try again; loop. + } +} diff --git a/kern/ispinlock.h b/kern/ispinlock.h new file mode 100644 index 0000000..e0303d6 --- /dev/null +++ b/kern/ispinlock.h @@ -0,0 +1,12 @@ +#pragma once +#include + +typedef struct { + volatile uint32_t v; // 0 = unlocked, 1 = locked +} spinlock_t; + +void spinlock_init(spinlock_t *l); +bool spin_trylock(spinlock_t *l); +void spin_unlock(spinlock_t *l); +void cpu_relax(void); +void spin_lock(spinlock_t *l); diff --git a/kern/kalloc.c b/kern/kalloc.c index 1a034bd..d3d254c 100644 --- a/kern/kalloc.c +++ b/kern/kalloc.c @@ -1,8 +1,8 @@ +#include #include #include #include #include -#include #include #include @@ -23,12 +23,12 @@ struct Run { /** Kernel memory allocator. */ struct { - struct Spinlock lock; - struct Run *freelist; + spinlock_t lock; + struct Run *freelist; } kmem; void kalloc_init() { - initlock(&kmem.lock, "kmem"); + spinlock_init(&kmem.lock); freerange(kernel_end, (void *)PHYSTOP); } @@ -43,8 +43,7 @@ void kfree(void *pa) { // Assert that page is a ligned to a page boundary and that its correctly // sized - if (((u64)pa % PGSIZE) != 0 || (char *)pa < kernel_end || - (u64)pa >= PHYSTOP) + if (((u64)pa % PGSIZE) != 0 || (char *)pa < kernel_end || (u64)pa >= PHYSTOP) panic("kfree"); // Fill with junk to catch dangling refs. @@ -52,23 +51,23 @@ void kfree(void *pa) { r = (struct Run *)pa; - acquire(&kmem.lock); + spin_lock(&kmem.lock); r->next = kmem.freelist; kmem.freelist = r; - release(&kmem.lock); + spin_unlock(&kmem.lock); } void *kalloc(void) { struct Run *r; - acquire(&kmem.lock); + spin_lock(&kmem.lock); r = kmem.freelist; if (r) kmem.freelist = r->next; - release(&kmem.lock); + spin_unlock(&kmem.lock); if (r) memset((char *)r, 5, PGSIZE); // fill with junk diff --git a/kern/kprint.c b/kern/kprint.c new file mode 100644 index 0000000..01bbac3 --- /dev/null +++ b/kern/kprint.c @@ -0,0 +1,103 @@ +#include +#include + +static void append_char(char **buf, size_t *remaining, char c) { + if (*remaining > 1) { // Leave space for null terminator + **buf = c; + (*buf)++; + (*remaining)--; + } +} + +static void append_str(char **buf, size_t *remaining, const char *str) { + while (*str) { + append_char(buf, remaining, *str++); + } +} + +static void append_int(char **buf, size_t *remaining, int value, int base) { + char tmp[32]; + const char *digits = "0123456789abcdef"; + bool neg = false; + int i = 0; + + if (base == 10 && value < 0) { + neg = true; + value = -value; + } + + do { + tmp[i++] = digits[value % base]; + value /= base; + } while (value && i < (int)sizeof(tmp)); + + if (neg) { + tmp[i++] = '-'; + } + + while (i--) { + append_char(buf, remaining, tmp[i]); + } +} + +int kvsnprintf(char *buf, size_t size, const char *fmt, va_list args) { + char *p = buf; + size_t remaining = size; + + while (*fmt) { + if (*fmt != '%') { + append_char(&p, &remaining, *fmt++); + continue; + } + fmt++; // skip '%' + + switch (*fmt) { + case 's': + append_str(&p, &remaining, va_arg(args, const char *)); + break; + case 'd': + append_int(&p, &remaining, va_arg(args, int), 10); + break; + case 'x': + append_int(&p, &remaining, va_arg(args, unsigned int), 16); + break; + case 'c': + append_char(&p, &remaining, (char)va_arg(args, int)); + break; + case '%': + append_char(&p, &remaining, '%'); + break; + default: + append_char(&p, &remaining, '?'); + break; + } + fmt++; + } + + if (remaining > 0) { + *p = '\0'; + } else { + buf[size - 1] = '\0'; + } + + return (int)(p - buf); +} + +int ksnprintf(char *buf, size_t size, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int ret = kvsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} + +void kprintf(const char *fmt, ...) { + char buffer[256]; + va_list args; + va_start(args, fmt); + kvsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + // Now send buffer to your console/serial output + console_write(buffer); +} diff --git a/kern/libkern/string.c b/kern/libkern/string.c new file mode 100644 index 0000000..99a0ea2 --- /dev/null +++ b/kern/libkern/string.c @@ -0,0 +1,127 @@ +#include + +void *memcpy(void *s1, const void *s2, size_t n) { + const char *f = s2; + char *t = s1; + + while (n-- > 0) *t++ = *f++; + return s1; +} + +void *memmove(void *s1, const void *s2, size_t n) { + const char *f = s2; + char *t = s1; + + if (f < t) { + f += n; + t += n; + while (n-- > 0) *--t = *--f; + } else + while (n-- > 0) *t++ = *f++; + return s1; +} + +void *memset(void *dest, int c, size_t n) { + unsigned char *s = dest; + size_t k; + + /* Fill head and tail with minimal branching. Each + * conditional ensures that all the subsequently used + * offsets are well-defined and in the dest region. */ + + if (!n) + return dest; + s[0] = c; + s[n - 1] = c; + if (n <= 2) + return dest; + s[1] = c; + s[2] = c; + s[n - 2] = c; + s[n - 3] = c; + if (n <= 6) + return dest; + s[3] = c; + s[n - 4] = c; + if (n <= 8) + return dest; + + /* Advance pointer to align it at a 4-byte boundary, + * and truncate n to a multiple of 4. The previous code + * already took care of any head/tail that get cut off + * by the alignment. */ + + k = -(uintptr_t)s & 3; + s += k; + n -= k; + n &= -4; + +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) u32; + typedef uint64_t __attribute__((__may_alias__)) u64; + + u32 c32 = ((u32)-1) / 255 * (unsigned char)c; + + /* In preparation to copy 32 bytes at a time, aligned on + * an 8-byte bounary, fill head/tail up to 28 bytes each. + * As in the initial byte-based head/tail fill, each + * conditional below ensures that the subsequent offsets + * are valid (e.g. !(n<=24) implies n>=28). */ + + *(u32 *)(s + 0) = c32; + *(u32 *)(s + n - 4) = c32; + if (n <= 8) + return dest; + *(u32 *)(s + 4) = c32; + *(u32 *)(s + 8) = c32; + *(u32 *)(s + n - 12) = c32; + *(u32 *)(s + n - 8) = c32; + if (n <= 24) + return dest; + *(u32 *)(s + 12) = c32; + *(u32 *)(s + 16) = c32; + *(u32 *)(s + 20) = c32; + *(u32 *)(s + 24) = c32; + *(u32 *)(s + n - 28) = c32; + *(u32 *)(s + n - 24) = c32; + *(u32 *)(s + n - 20) = c32; + *(u32 *)(s + n - 16) = c32; + + /* Align to a multiple of 8 so we can fill 64 bits at a time, + * and avoid writing the same bytes twice as much as is + * practical without introducing additional branching. */ + + k = 24 + ((uintptr_t)s & 4); + s += k; + n -= k; + + /* If this loop is reached, 28 tail bytes have already been + * filled, so any remainder when n drops below 32 can be + * safely ignored. */ + + u64 c64 = c32 | ((u64)c32 << 32); + for (; n >= 32; n -= 32, s += 32) { + *(u64 *)(s + 0) = c64; + *(u64 *)(s + 8) = c64; + *(u64 *)(s + 16) = c64; + *(u64 *)(s + 24) = c64; + } +#else + /* Pure C fallback with no aliasing violations. */ + for (; n; n--, s++) *s = c; +#endif + + return dest; +} + +int memcmp(const void *s1, const void *s2, size_t n) { + if (n != 0) { + const unsigned char *p1 = s1, *p2 = s2; + + do { + if (*p1++ != *p2++) + return (*--p1 - *--p2); + } while (--n != 0); + } + return (0); +} diff --git a/kern/libkern/string.h b/kern/libkern/string.h new file mode 100644 index 0000000..01d3853 --- /dev/null +++ b/kern/libkern/string.h @@ -0,0 +1,7 @@ +#pragma once +#include + +void *memcpy(void *s1, const void *s2, size_t n); +void *memmove(void *s1, const void *s2, size_t n); +void *memset(void *dest, int c, size_t n); +int memcmp(const void *s1, const void *s2, size_t n); diff --git a/kern/proc.h b/kern/proc.h new file mode 100644 index 0000000..b594fc9 --- /dev/null +++ b/kern/proc.h @@ -0,0 +1,77 @@ +#include + +typedef enum { + UNUSED, + USED, + SLEEPING, + RUNNABLE, + RUNNING, + ZOMBIE, +} ProcessState; + +/** Saved registers for kernel context switches. */ +struct Context { + uint64_t ra; + uint64_t sp; + + // callee-saved + uint64_t s0; + uint64_t s1; + uint64_t s2; + uint64_t s3; + uint64_t s4; + uint64_t s5; + uint64_t s6; + uint64_t s7; + uint64_t s8; + uint64_t s9; + uint64_t s10; + uint64_t s11; +}; + +/** Per-CPU state. */ +struct Cpu { + struct Process *proc; // The process running on this cpu, or null. + struct Context context; // swtch() here to enter scheduler(). + int noff; // Depth of push_off() nesting. + int intena; // Were interrupts enabled before push_off()? +}; + +typedef struct { + /* 0 */ uint64_t kernel_satp; // kernel page table + /* 8 */ uint64_t kernel_sp; // top of process's kernel stack + /* 16 */ uint64_t kernel_trap; // usertrap() + /* 24 */ uint64_t epc; // saved user program counter + /* 32 */ uint64_t kernel_hartid; // saved kernel tp + /* 40 */ uint64_t ra; + /* 48 */ uint64_t sp; + /* 56 */ uint64_t gp; + /* 64 */ uint64_t tp; + /* 72 */ uint64_t t0; + /* 80 */ uint64_t t1; + /* 88 */ uint64_t t2; + /* 96 */ uint64_t s0; + /* 104 */ uint64_t s1; + /* 112 */ uint64_t a0; + /* 120 */ uint64_t a1; + /* 128 */ uint64_t a2; + /* 136 */ uint64_t a3; + /* 144 */ uint64_t a4; + /* 152 */ uint64_t a5; + /* 160 */ uint64_t a6; + /* 168 */ uint64_t a7; + /* 176 */ uint64_t s2; + /* 184 */ uint64_t s3; + /* 192 */ uint64_t s4; + /* 200 */ uint64_t s5; + /* 208 */ uint64_t s6; + /* 216 */ uint64_t s7; + /* 224 */ uint64_t s8; + /* 232 */ uint64_t s9; + /* 240 */ uint64_t s10; + /* 248 */ uint64_t s11; + /* 256 */ uint64_t t3; + /* 264 */ uint64_t t4; + /* 272 */ uint64_t t5; + /* 280 */ uint64_t t6; +} TrapFrame_t; diff --git a/start.c b/start.c index d9ca8ef..7f72faf 100644 --- a/start.c +++ b/start.c @@ -1,9 +1,9 @@ #include +#include #include #include #include #include -#include #include #include @@ -16,8 +16,8 @@ char stack0[4096 * NCPU] __attribute__((aligned(16))); /* Keep this here and sync on it until we have synchronized printf */ -struct Spinlock sl = {0}; -volatile int greeted = 0; +spinlock_t sl = {0}; +volatile int hold = 1; /* This is where entry.S drops us of. All cores land here */ void start() { @@ -29,24 +29,22 @@ void start() { // cpu (struct Cpu). write_tp(id); - acquire(&sl); - - if (!greeted) { - uart_puts("Hello Neptune!\n"); - greeted = 1; - } - - uart_puts("Hart number: "); - uart_putc(id + '0'); - uart_putc('\n'); - - release(&sl); - if (id == 0) { /* Here we will do a bunch of initialization steps */ kalloc_init(); + uart_puts("Hello Neptune!\n"); + spinlock_init(&sl); + hold = 0; } + while (hold); + + spin_lock(&sl); + uart_puts("Hart number: "); + uart_putc(id + '0'); + uart_putc('\n'); + spin_unlock(&sl); + // We should not arrive here, but if we do, hang in a while on wfi. while (1) __asm__ volatile("wfi"); // (Wait For Interrupt) } diff --git a/types.h b/types.h index cd25ff8..69e74d4 100644 --- a/types.h +++ b/types.h @@ -4,4 +4,13 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long u64; -typedef u64 size_t; + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; +typedef uint64_t size_t; + +typedef uint64_t uintptr_t; + +typedef u8 bool;