diff --git a/abstract-machine/.gitignore b/abstract-machine/.gitignore new file mode 100644 index 0000000..84c3ed2 --- /dev/null +++ b/abstract-machine/.gitignore @@ -0,0 +1,19 @@ +* +!*/ +!*.h +!*.c +!*.cc +!*.S +!*.ld +!*.sh +!*.py +!*.mk +!Makefile +!README +!LICENSE +.* +_* +*~ +build/ +!.gitignore +.vscode \ No newline at end of file diff --git a/abstract-machine/LICENSE b/abstract-machine/LICENSE new file mode 100644 index 0000000..7ff476d --- /dev/null +++ b/abstract-machine/LICENSE @@ -0,0 +1,22 @@ +The AbstractMachine software is: + +Copyright (c) 2018-2021 Yanyan Jiang and Zihao Yu + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/abstract-machine/Makefile b/abstract-machine/Makefile new file mode 100644 index 0000000..3a5d60a --- /dev/null +++ b/abstract-machine/Makefile @@ -0,0 +1,163 @@ +# Makefile for AbstractMachine Kernels and Libraries + +### *Get a more readable version of this Makefile* by `make html` (requires python-markdown) +html: + cat Makefile | sed 's/^\([^#]\)/ \1/g' | markdown_py > Makefile.html +.PHONY: html + +## 1. Basic Setup and Checks + +### Default to create a bare-metal kernel image +ifeq ($(MAKECMDGOALS),) + MAKECMDGOALS = image + .DEFAULT_GOAL = image +endif + +### Override checks when `make clean/clean-all/html` +ifeq ($(findstring $(MAKECMDGOALS),clean|clean-all|html),) + +### Print build info message +$(info # Building $(NAME)-$(MAKECMDGOALS) [$(ARCH)]) + +### Check: environment variable `$AM_HOME` looks sane +ifeq ($(wildcard $(AM_HOME)/am/include/am.h),) + $(error $$AM_HOME must be an AbstractMachine repo) +endif + +### Check: environment variable `$ARCH` must be in the supported list +ARCHS = $(basename $(notdir $(shell ls $(AM_HOME)/scripts/*.mk))) +ifeq ($(filter $(ARCHS), $(ARCH)), ) + $(error Expected $$ARCH in {$(ARCHS)}, Got "$(ARCH)") +endif + +### Extract instruction set architecture (`ISA`) and platform from `$ARCH`. Example: `ARCH=x86_64-qemu -> ISA=x86_64; PLATFORM=qemu` +ARCH_SPLIT = $(subst -, ,$(ARCH)) +ISA = $(word 1,$(ARCH_SPLIT)) +PLATFORM = $(word 2,$(ARCH_SPLIT)) + +### Check if there is something to build +ifeq ($(flavor SRCS), undefined) + $(error Nothing to build) +endif + +### Checks end here +endif + +## 2. General Compilation Targets + +### Create the destination directory (`build/$ARCH`) +WORK_DIR = $(shell pwd) +DST_DIR = $(WORK_DIR)/build/$(ARCH) +$(shell mkdir -p $(DST_DIR)) + +### Compilation targets (a binary image or archive) +IMAGE_REL = build/$(NAME)-$(ARCH) +IMAGE = $(abspath $(IMAGE_REL)) +ARCHIVE = $(WORK_DIR)/build/$(NAME)-$(ARCH).a + +### Collect the files to be linked: object files (`.o`) and libraries (`.a`) +OBJS = $(addprefix $(DST_DIR)/, $(addsuffix .o, $(basename $(SRCS)))) +LIBS := $(sort $(LIBS) am klib) # lazy evaluation ("=") causes infinite recursions +LINKAGE = $(OBJS) \ + $(addsuffix -$(ARCH).a, $(join \ + $(addsuffix /build/, $(addprefix $(AM_HOME)/, $(LIBS))), \ + $(LIBS) )) + +## 3. General Compilation Flags + +### (Cross) compilers, e.g., mips-linux-gnu-g++ +AS = $(CROSS_COMPILE)gcc +CC = $(CROSS_COMPILE)gcc +CXX = $(CROSS_COMPILE)g++ +LD = $(CROSS_COMPILE)ld +AR = $(CROSS_COMPILE)ar +OBJDUMP = $(CROSS_COMPILE)objdump +OBJCOPY = $(CROSS_COMPILE)objcopy +READELF = $(CROSS_COMPILE)readelf + +### Compilation flags +INC_PATH += $(WORK_DIR)/include $(addsuffix /include/, $(addprefix $(AM_HOME)/, $(LIBS))) +INCFLAGS += $(addprefix -I, $(INC_PATH)) + +ARCH_H := arch/$(ARCH).h +CFLAGS += -O2 -MMD -Wall -Werror $(INCFLAGS) \ + -D__ISA__=\"$(ISA)\" -D__ISA_$(shell echo $(ISA) | tr a-z A-Z)__ \ + -D__ARCH__=$(ARCH) -D__ARCH_$(shell echo $(ARCH) | tr a-z A-Z | tr - _) \ + -D__PLATFORM__=$(PLATFORM) -D__PLATFORM_$(shell echo $(PLATFORM) | tr a-z A-Z | tr - _) \ + -DARCH_H=\"$(ARCH_H)\" \ + -fno-asynchronous-unwind-tables -fno-builtin -fno-stack-protector \ + -Wno-main -U_FORTIFY_SOURCE +CXXFLAGS += $(CFLAGS) -ffreestanding -fno-rtti -fno-exceptions +ASFLAGS += -MMD $(INCFLAGS) +LDFLAGS += -z noexecstack + +## 4. Arch-Specific Configurations + +### Paste in arch-specific configurations (e.g., from `scripts/x86_64-qemu.mk`) +-include $(AM_HOME)/scripts/$(ARCH).mk + +### Fall back to native gcc/binutils if there is no cross compiler +ifeq ($(wildcard $(shell which $(CC))),) + $(info # $(CC) not found; fall back to default gcc and binutils) + CROSS_COMPILE := +endif + +## 5. Compilation Rules + +### Rule (compile): a single `.c` -> `.o` (gcc) +$(DST_DIR)/%.o: %.c + @mkdir -p $(dir $@) && echo + CC $< + @$(CC) -std=gnu11 $(CFLAGS) -c -o $@ $(realpath $<) + +### Rule (compile): a single `.cc` -> `.o` (g++) +$(DST_DIR)/%.o: %.cc + @mkdir -p $(dir $@) && echo + CXX $< + @$(CXX) -std=c++17 $(CXXFLAGS) -c -o $@ $(realpath $<) + +### Rule (compile): a single `.cpp` -> `.o` (g++) +$(DST_DIR)/%.o: %.cpp + @mkdir -p $(dir $@) && echo + CXX $< + @$(CXX) -std=c++17 $(CXXFLAGS) -c -o $@ $(realpath $<) + +### Rule (compile): a single `.S` -> `.o` (gcc, which preprocesses and calls as) +$(DST_DIR)/%.o: %.S + @mkdir -p $(dir $@) && echo + AS $< + @$(AS) $(ASFLAGS) -c -o $@ $(realpath $<) + +### Rule (recursive make): build a dependent library (am, klib, ...) +$(LIBS): %: + @$(MAKE) -s -C $(AM_HOME)/$* archive + +### Rule (link): objects (`*.o`) and libraries (`*.a`) -> `IMAGE.elf`, the final ELF binary to be packed into image (ld) +$(IMAGE).elf: $(OBJS) am $(LIBS) + @echo + LD "->" $(IMAGE_REL).elf + @$(LD) $(LDFLAGS) -o $(IMAGE).elf --start-group $(LINKAGE) --end-group + +### Rule (archive): objects (`*.o`) -> `ARCHIVE.a` (ar) +$(ARCHIVE): $(OBJS) + @echo + AR "->" $(shell realpath $@ --relative-to .) + @$(AR) rcs $(ARCHIVE) $(OBJS) + +### Rule (`#include` dependencies): paste in `.d` files generated by gcc on `-MMD` +-include $(addprefix $(DST_DIR)/, $(addsuffix .d, $(basename $(SRCS)))) + +## 6. Miscellaneous + +### Build order control +image: image-dep +archive: $(ARCHIVE) +image-dep: $(OBJS) am $(LIBS) + @echo \# Creating image [$(ARCH)] +.PHONY: image image-dep archive run $(LIBS) + +### Clean a single project (remove `build/`) +clean: + rm -rf Makefile.html $(WORK_DIR)/build/ +.PHONY: clean + +### Clean all sub-projects within depth 2 (and ignore errors) +CLEAN_ALL = $(dir $(shell find . -mindepth 2 -name Makefile)) +clean-all: $(CLEAN_ALL) clean +$(CLEAN_ALL): + -@$(MAKE) -s -C $@ clean +.PHONY: clean-all $(CLEAN_ALL) diff --git a/abstract-machine/README b/abstract-machine/README new file mode 100644 index 0000000..2e0392a --- /dev/null +++ b/abstract-machine/README @@ -0,0 +1,13 @@ +AbstractMachine is a minimal, modularized, and machine-independent +abstraction layer of the computer hardware: + +* physical memory and direct execution (The "Turing Machine"); +* basic model for input and output devices (I/O Extension); +* interrupt/exception and processor context management (Context Extension); +* virtual memory and protection (Virtual Memory Extension); +* multiprocessing (Multiprocessing Extension). + +CONTACTS + +Bug reports and suggestions go to Yanyan Jiang (jyy@nju.edu.cn) and Zihao +Yu (yuzihao@ict.ac.cn). diff --git a/abstract-machine/am/Makefile b/abstract-machine/am/Makefile new file mode 100644 index 0000000..48de559 --- /dev/null +++ b/abstract-machine/am/Makefile @@ -0,0 +1,5 @@ +NAME := am +SRCS = $(addprefix src/, $(AM_SRCS)) +INC_PATH += $(AM_HOME)/am/src + +include $(AM_HOME)/Makefile diff --git a/abstract-machine/am/include/am.h b/abstract-machine/am/include/am.h new file mode 100644 index 0000000..1ba8b56 --- /dev/null +++ b/abstract-machine/am/include/am.h @@ -0,0 +1,81 @@ +#ifndef AM_H__ +#define AM_H__ + +#include +#include +#include +#include ARCH_H // this macro is defined in $CFLAGS + // examples: "arch/x86-qemu.h", "arch/native.h", ... + +// Memory protection flags +#define MMAP_NONE 0x00000000 // no access +#define MMAP_READ 0x00000001 // can read +#define MMAP_WRITE 0x00000002 // can write + +// Memory area for [@start, @end) +typedef struct { + void *start, *end; +} Area; + +// Arch-dependent processor context +typedef struct Context Context; + +// An event of type @event, caused by @cause of pointer @ref +typedef struct { + enum { + EVENT_NULL = 0, + EVENT_YIELD, EVENT_SYSCALL, EVENT_PAGEFAULT, EVENT_ERROR, + EVENT_IRQ_TIMER, EVENT_IRQ_IODEV, + } event; + uintptr_t cause, ref; + const char *msg; +} Event; + +// A protected address space with user memory @area +// and arch-dependent @ptr +typedef struct { + int pgsize; + Area area; + void *ptr; +} AddrSpace; + +#ifdef __cplusplus +extern "C" { +#endif + +// ----------------------- TRM: Turing Machine ----------------------- +extern Area heap; +void putch (char ch); +void halt (int code) __attribute__((__noreturn__)); + +// -------------------- IOE: Input/Output Devices -------------------- +bool ioe_init (void); +void ioe_read (int reg, void *buf); +void ioe_write (int reg, void *buf); +#include "amdev.h" + +// ---------- CTE: Interrupt Handling and Context Switching ---------- +bool cte_init (Context *(*handler)(Event ev, Context *ctx)); +void yield (void); +bool ienabled (void); +void iset (bool enable); +Context *kcontext (Area kstack, void (*entry)(void *), void *arg); + +// ----------------------- VME: Virtual Memory ----------------------- +bool vme_init (void *(*pgalloc)(int), void (*pgfree)(void *)); +void protect (AddrSpace *as); +void unprotect (AddrSpace *as); +void map (AddrSpace *as, void *vaddr, void *paddr, int prot); +Context *ucontext (AddrSpace *as, Area kstack, void *entry); + +// ---------------------- MPE: Multi-Processing ---------------------- +bool mpe_init (void (*entry)()); +int cpu_count (void); +int cpu_current (void); +int atomic_xchg (int *addr, int newval); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/abstract-machine/am/include/amdev.h b/abstract-machine/am/include/amdev.h new file mode 100644 index 0000000..e1cdc48 --- /dev/null +++ b/abstract-machine/am/include/amdev.h @@ -0,0 +1,74 @@ +#ifndef __AMDEV_H__ +#define __AMDEV_H__ + +// **MAY SUBJECT TO CHANGE IN THE FUTURE** + +#define AM_DEVREG(id, reg, perm, ...) \ + enum { AM_##reg = (id) }; \ + typedef struct { __VA_ARGS__; } AM_##reg##_T; + +AM_DEVREG( 1, UART_CONFIG, RD, bool present); +AM_DEVREG( 2, UART_TX, WR, char data); +AM_DEVREG( 3, UART_RX, RD, char data); +AM_DEVREG( 4, TIMER_CONFIG, RD, bool present, has_rtc); +AM_DEVREG( 5, TIMER_RTC, RD, int year, month, day, hour, minute, second); +AM_DEVREG( 6, TIMER_UPTIME, RD, uint64_t us); +AM_DEVREG( 7, INPUT_CONFIG, RD, bool present); +AM_DEVREG( 8, INPUT_KEYBRD, RD, bool keydown; int keycode); +AM_DEVREG( 9, GPU_CONFIG, RD, bool present, has_accel; int width, height, vmemsz); +AM_DEVREG(10, GPU_STATUS, RD, bool ready); +AM_DEVREG(11, GPU_FBDRAW, WR, int x, y; void *pixels; int w, h; bool sync); +AM_DEVREG(12, GPU_MEMCPY, WR, uint32_t dest; void *src; int size); +AM_DEVREG(13, GPU_RENDER, WR, uint32_t root); +AM_DEVREG(14, AUDIO_CONFIG, RD, bool present; int bufsize); +AM_DEVREG(15, AUDIO_CTRL, WR, int freq, channels, samples); +AM_DEVREG(16, AUDIO_STATUS, RD, int count); +AM_DEVREG(17, AUDIO_PLAY, WR, Area buf); +AM_DEVREG(18, DISK_CONFIG, RD, bool present; int blksz, blkcnt); +AM_DEVREG(19, DISK_STATUS, RD, bool ready); +AM_DEVREG(20, DISK_BLKIO, WR, bool write; void *buf; int blkno, blkcnt); +AM_DEVREG(21, NET_CONFIG, RD, bool present); +AM_DEVREG(22, NET_STATUS, RD, int rx_len, tx_len); +AM_DEVREG(23, NET_TX, WR, Area buf); +AM_DEVREG(24, NET_RX, WR, Area buf); + +// Input + +#define AM_KEYS(_) \ + _(ESCAPE) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) _(F8) _(F9) _(F10) _(F11) _(F12) \ + _(GRAVE) _(1) _(2) _(3) _(4) _(5) _(6) _(7) _(8) _(9) _(0) _(MINUS) _(EQUALS) _(BACKSPACE) \ + _(TAB) _(Q) _(W) _(E) _(R) _(T) _(Y) _(U) _(I) _(O) _(P) _(LEFTBRACKET) _(RIGHTBRACKET) _(BACKSLASH) \ + _(CAPSLOCK) _(A) _(S) _(D) _(F) _(G) _(H) _(J) _(K) _(L) _(SEMICOLON) _(APOSTROPHE) _(RETURN) \ + _(LSHIFT) _(Z) _(X) _(C) _(V) _(B) _(N) _(M) _(COMMA) _(PERIOD) _(SLASH) _(RSHIFT) \ + _(LCTRL) _(APPLICATION) _(LALT) _(SPACE) _(RALT) _(RCTRL) \ + _(UP) _(DOWN) _(LEFT) _(RIGHT) _(INSERT) _(DELETE) _(HOME) _(END) _(PAGEUP) _(PAGEDOWN) + +#define AM_KEY_NAMES(key) AM_KEY_##key, +enum { + AM_KEY_NONE = 0, + AM_KEYS(AM_KEY_NAMES) +}; + +// GPU + +#define AM_GPU_TEXTURE 1 +#define AM_GPU_SUBTREE 2 +#define AM_GPU_NULL 0xffffffff + +typedef uint32_t gpuptr_t; + +struct gpu_texturedesc { + uint16_t w, h; + gpuptr_t pixels; +} __attribute__((packed)); + +struct gpu_canvas { + uint16_t type, w, h, x1, y1, w1, h1; + gpuptr_t sibling; + union { + gpuptr_t child; + struct gpu_texturedesc texture; + }; +} __attribute__((packed)); + +#endif diff --git a/abstract-machine/am/include/arch/loongarch32r-nemu.h b/abstract-machine/am/include/arch/loongarch32r-nemu.h new file mode 100644 index 0000000..b0a753b --- /dev/null +++ b/abstract-machine/am/include/arch/loongarch32r-nemu.h @@ -0,0 +1,16 @@ +#ifndef __ARCH_H__ +#define __ARCH_H__ + +struct Context { + // TODO: fix the order of these members to match trap.S + uintptr_t gpr[32], era, estat, prmd; + void *pdir; +}; + +#define GPR1 gpr[11] // a7 +#define GPR2 gpr[0] +#define GPR3 gpr[0] +#define GPR4 gpr[0] +#define GPRx gpr[0] + +#endif diff --git a/abstract-machine/am/include/arch/mips32-nemu.h b/abstract-machine/am/include/arch/mips32-nemu.h new file mode 100644 index 0000000..2aad38c --- /dev/null +++ b/abstract-machine/am/include/arch/mips32-nemu.h @@ -0,0 +1,16 @@ +#ifndef __ARCH_H__ +#define __ARCH_H__ + +struct Context { + // TODO: fix the order of these members to match trap.S + uintptr_t hi, gpr[32], epc, cause, lo, status; + void *pdir; +}; + +#define GPR1 gpr[2] // v0 +#define GPR2 gpr[0] +#define GPR3 gpr[0] +#define GPR4 gpr[0] +#define GPRx gpr[0] + +#endif diff --git a/abstract-machine/am/include/arch/native.h b/abstract-machine/am/include/arch/native.h new file mode 100644 index 0000000..8b94efd --- /dev/null +++ b/abstract-machine/am/include/arch/native.h @@ -0,0 +1,26 @@ +#ifndef ARCH_H__ +#define ARCH_H__ + +#ifndef __USE_GNU +# define __USE_GNU +#endif + +#include + +struct Context { + uintptr_t ksp; + void *vm_head; + ucontext_t uc; + // skip the red zone of the stack frame, see the amd64 ABI manual for details + uint8_t redzone[128]; +}; + +#define GPR1 uc.uc_mcontext.gregs[REG_RDI] +#define GPR2 uc.uc_mcontext.gregs[REG_RSI] +#define GPR3 uc.uc_mcontext.gregs[REG_RDX] +#define GPR4 uc.uc_mcontext.gregs[REG_RCX] +#define GPRx uc.uc_mcontext.gregs[REG_RAX] + +#undef __USE_GNU + +#endif diff --git a/abstract-machine/am/include/arch/riscv.h b/abstract-machine/am/include/arch/riscv.h new file mode 100644 index 0000000..9709050 --- /dev/null +++ b/abstract-machine/am/include/arch/riscv.h @@ -0,0 +1,27 @@ +#ifndef ARCH_H__ +#define ARCH_H__ + +#ifdef __riscv_e +#define NR_REGS 16 +#else +#define NR_REGS 32 +#endif + +struct Context { + // TODO: fix the order of these members to match trap.S + uintptr_t mepc, mcause, gpr[NR_REGS], mstatus; + void *pdir; +}; + +#ifdef __riscv_e +#define GPR1 gpr[15] // a5 +#else +#define GPR1 gpr[17] // a7 +#endif + +#define GPR2 gpr[0] +#define GPR3 gpr[0] +#define GPR4 gpr[0] +#define GPRx gpr[0] + +#endif diff --git a/abstract-machine/am/include/arch/x86-nemu.h b/abstract-machine/am/include/arch/x86-nemu.h new file mode 100644 index 0000000..a4e8176 --- /dev/null +++ b/abstract-machine/am/include/arch/x86-nemu.h @@ -0,0 +1,17 @@ +#ifndef ARCH_H__ +#define ARCH_H__ + +struct Context { + // TODO: fix the order of these members to match trap.S + uintptr_t esi, ebx, eax, eip, edx, eflags, ecx, cs, esp, edi, ebp; + void *cr3; + int irq; +}; + +#define GPR1 eax +#define GPR2 eip +#define GPR3 eip +#define GPR4 eip +#define GPRx eip + +#endif diff --git a/abstract-machine/am/include/arch/x86-qemu.h b/abstract-machine/am/include/arch/x86-qemu.h new file mode 100644 index 0000000..e4e2a1e --- /dev/null +++ b/abstract-machine/am/include/arch/x86-qemu.h @@ -0,0 +1,17 @@ +#ifndef ARCH_H__ +#define ARCH_H__ + +struct Context { + void *cr3; + uint32_t ds, eax, ebx, ecx, edx, + esp0, esi, edi, ebp, + eip, cs, eflags, esp, ss3; +}; + +#define GPR1 eax +#define GPR2 ebx +#define GPR3 ecx +#define GPR4 edx +#define GPRx eax + +#endif diff --git a/abstract-machine/am/include/arch/x86_64-qemu.h b/abstract-machine/am/include/arch/x86_64-qemu.h new file mode 100644 index 0000000..3e9746e --- /dev/null +++ b/abstract-machine/am/include/arch/x86_64-qemu.h @@ -0,0 +1,21 @@ +#ifndef ARCH_H__ +#define ARCH_H__ + +struct Context { + void *cr3; + uint64_t rax, rbx, rcx, rdx, + rbp, rsi, rdi, + r8, r9, r10, r11, + r12, r13, r14, r15, + rip, cs, rflags, + rsp, ss, rsp0; +}; + + +#define GPR1 rdi +#define GPR2 rsi +#define GPR3 rdx +#define GPR4 rcx +#define GPRx rax + +#endif diff --git a/abstract-machine/am/src/loongarch/loongarch32r.h b/abstract-machine/am/src/loongarch/loongarch32r.h new file mode 100644 index 0000000..49147b7 --- /dev/null +++ b/abstract-machine/am/src/loongarch/loongarch32r.h @@ -0,0 +1,24 @@ +#ifndef LOONGARCH32R_H__ +#define LOONGARCH32R_H__ + +#include + +static inline uint8_t inb(uintptr_t addr) { return *(volatile uint8_t *)addr; } +static inline uint16_t inw(uintptr_t addr) { return *(volatile uint16_t *)addr; } +static inline uint32_t inl(uintptr_t addr) { return *(volatile uint32_t *)addr; } + +static inline void outb(uintptr_t addr, uint8_t data) { *(volatile uint8_t *)addr = data; } +static inline void outw(uintptr_t addr, uint16_t data) { *(volatile uint16_t *)addr = data; } +static inline void outl(uintptr_t addr, uint32_t data) { *(volatile uint32_t *)addr = data; } + +#define PTE_V 0x1 +#define PTE_D 0x2 + +// Page directory and page table constants +#define PTXSHFT 12 // Offset of PTX in a linear address +#define PDXSHFT 22 // Offset of PDX in a linear address + +#define PDX(va) (((uint32_t)(va) >> PDXSHFT) & 0x3ff) +#define PTX(va) (((uint32_t)(va) >> PTXSHFT) & 0x3ff) + +#endif diff --git a/abstract-machine/am/src/loongarch/nemu/cte.c b/abstract-machine/am/src/loongarch/nemu/cte.c new file mode 100644 index 0000000..ca82de9 --- /dev/null +++ b/abstract-machine/am/src/loongarch/nemu/cte.c @@ -0,0 +1,47 @@ +#include +#include +#include + +static Context* (*user_handler)(Event, Context*) = NULL; + +Context* __am_irq_handle(Context *c) { + if (user_handler) { + Event ev = {0}; + uintptr_t ecode = 0; + switch (ccode) { + default: ev.event = EVENT_ERROR; break; + } + + c = user_handler(ev, c); + assert(c != NULL); + } + + return c; +} + +extern void __am_asm_trap(void); + +bool cte_init(Context*(*handler)(Event, Context*)) { + // initialize exception entry + asm volatile("csrwr %0, 0xc" : : "r"(__am_asm_trap)); // 0xc = eentry + + // register event handler + user_handler = handler; + + return true; +} + +Context *kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { + asm volatile("li.w $a7, -1; syscall 0"); +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/loongarch/nemu/start.S b/abstract-machine/am/src/loongarch/nemu/start.S new file mode 100644 index 0000000..56341a2 --- /dev/null +++ b/abstract-machine/am/src/loongarch/nemu/start.S @@ -0,0 +1,8 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + move $fp, $zero + la.local $sp, _stack_pointer + bl _trm_init diff --git a/abstract-machine/am/src/loongarch/nemu/trap.S b/abstract-machine/am/src/loongarch/nemu/trap.S new file mode 100644 index 0000000..e02f7fc --- /dev/null +++ b/abstract-machine/am/src/loongarch/nemu/trap.S @@ -0,0 +1,50 @@ +#define concat_temp(x, y) x ## y +#define concat(x, y) concat_temp(x, y) +#define MAP(c, f) c(f) + +#define REGS(f) \ + f( 1) f( 2) f( 4) f( 5) f( 6) f( 7) f( 8) f( 9) \ +f(10) f(11) f(12) f(13) f(14) f(15) f(16) f(17) f(18) f(19) \ +f(20) f(21) f(22) f(23) f(24) f(25) f(26) f(27) f(28) f(29) \ +f(30) f(31) + +#define PUSH(n) st.w $concat(r, n), $sp, (n * 4); +#define POP(n) ld.w $concat(r, n), $sp, (n * 4); + +#define CONTEXT_SIZE ((32 + 3) * 4) +#define OFFSET_SP ( 3 * 4) +#define OFFSET_ESTAT (32 * 4) +#define OFFSET_PRMD (33 * 4) +#define OFFSET_ERA (34 * 4) + +#define CSR_ESTAT 0x5 +#define CSR_PRMD 0x1 +#define CSR_ERA 0x6 + +.align 6 +.globl __am_asm_trap +__am_asm_trap: + addi.w $sp, $sp, -CONTEXT_SIZE + + MAP(REGS, PUSH) + + csrrd $t0, CSR_ESTAT + csrrd $t1, CSR_PRMD + csrrd $t2, CSR_ERA + + st.w $t0, $sp, OFFSET_ESTAT + st.w $t1, $sp, OFFSET_PRMD + st.w $t2, $sp, OFFSET_ERA + + move $a0, $sp + bl __am_irq_handle + + ld.w $t1, $sp, OFFSET_PRMD + ld.w $t2, $sp, OFFSET_ERA + csrwr $t1, CSR_PRMD + csrwr $t2, CSR_ERA + + MAP(REGS, POP) + + addi.w $sp, $sp, CONTEXT_SIZE + ertn diff --git a/abstract-machine/am/src/loongarch/nemu/vme.c b/abstract-machine/am/src/loongarch/nemu/vme.c new file mode 100644 index 0000000..88c0211 --- /dev/null +++ b/abstract-machine/am/src/loongarch/nemu/vme.c @@ -0,0 +1,42 @@ +#include +#include + +#define USER_SPACE RANGE(0x40000000, 0x80000000) + +static void* (*pgalloc_usr)(int) = NULL; +static void (*pgfree_usr)(void*) = NULL; +static int vme_enable = 0; +static PTE *cur_pdir = NULL; + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + pgalloc_usr = pgalloc_f; + pgfree_usr = pgfree_f; + vme_enable = 1; + return true; +} + +void protect(AddrSpace *as) { + as->ptr = (PTE*)(pgalloc_usr(PGSIZE)); + as->pgsize = PGSIZE; + as->area = USER_SPACE; +} + +void unprotect(AddrSpace *as) { +} + +void __am_get_cur_as(Context *c) { + c->pdir = (vme_enable ? cur_pdir : NULL); +} + +void __am_switch(Context *c) { + if (vme_enable && c->pdir != NULL) { + cur_pdir = c->pdir; + } +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/mips/mips32.h b/abstract-machine/am/src/mips/mips32.h new file mode 100644 index 0000000..a2ce84b --- /dev/null +++ b/abstract-machine/am/src/mips/mips32.h @@ -0,0 +1,24 @@ +#ifndef MIPS32_H__ +#define MIPS32_H__ + +#include + +static inline uint8_t inb(uintptr_t addr) { return *(volatile uint8_t *)addr; } +static inline uint16_t inw(uintptr_t addr) { return *(volatile uint16_t *)addr; } +static inline uint32_t inl(uintptr_t addr) { return *(volatile uint32_t *)addr; } + +static inline void outb(uintptr_t addr, uint8_t data) { *(volatile uint8_t *)addr = data; } +static inline void outw(uintptr_t addr, uint16_t data) { *(volatile uint16_t *)addr = data; } +static inline void outl(uintptr_t addr, uint32_t data) { *(volatile uint32_t *)addr = data; } + +#define PTE_V 0x2 +#define PTE_D 0x4 + +// Page directory and page table constants +#define PTXSHFT 12 // Offset of PTX in a linear address +#define PDXSHFT 22 // Offset of PDX in a linear address + +#define PDX(va) (((uint32_t)(va) >> PDXSHFT) & 0x3ff) +#define PTX(va) (((uint32_t)(va) >> PTXSHFT) & 0x3ff) + +#endif diff --git a/abstract-machine/am/src/mips/nemu/cte.c b/abstract-machine/am/src/mips/nemu/cte.c new file mode 100644 index 0000000..d18a540 --- /dev/null +++ b/abstract-machine/am/src/mips/nemu/cte.c @@ -0,0 +1,54 @@ +#include +#include +#include + +static Context* (*user_handler)(Event, Context*) = NULL; + +Context* __am_irq_handle(Context *c) { + if (user_handler) { + Event ev = {0}; + uint32_t ex_code = 0; + switch (ex_code) { + default: ev.event = EVENT_ERROR; break; + } + + c = user_handler(ev, c); + assert(c != NULL); + } + + return c; +} + +extern void __am_asm_trap(void); + +#define EX_ENTRY 0x80000180 + +bool cte_init(Context*(*handler)(Event, Context*)) { + // initialize exception entry + const uint32_t j_opcode = 0x08000000; + uint32_t instr = j_opcode | (((uint32_t)__am_asm_trap >> 2) & 0x3ffffff); + *(uint32_t *)EX_ENTRY = instr; + *(uint32_t *)(EX_ENTRY + 4) = 0; // delay slot + *(uint32_t *)0x80000000 = instr; // TLB refill exception + *(uint32_t *)(0x80000000 + 4) = 0; // delay slot + + // register event handler + user_handler = handler; + + return true; +} + +Context *kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { + asm volatile("syscall 1"); +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/mips/nemu/start.S b/abstract-machine/am/src/mips/nemu/start.S new file mode 100644 index 0000000..6ce9f15 --- /dev/null +++ b/abstract-machine/am/src/mips/nemu/start.S @@ -0,0 +1,10 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + move $fp, $zero + la $sp, _stack_pointer + jal _trm_init + +.fill 0x200 diff --git a/abstract-machine/am/src/mips/nemu/trap.S b/abstract-machine/am/src/mips/nemu/trap.S new file mode 100644 index 0000000..daa101f --- /dev/null +++ b/abstract-machine/am/src/mips/nemu/trap.S @@ -0,0 +1,71 @@ + +#define MAP(c, f) c(f) + +#define REGS(f) \ + f( 1) f( 2) f( 3) f( 4) f( 5) f( 6) f( 7) f( 8) f( 9) \ +f(10) f(11) f(12) f(13) f(14) f(15) f(16) f(17) f(18) f(19) \ +f(20) f(21) f(22) f(23) f(24) f(25) f(28) \ +f(30) f(31) + +#define PUSH(n) sw $n, (n * 4)($sp); +#define POP(n) lw $n, (n * 4)($sp); + +#define CONTEXT_SIZE ((31 + 6) * 4) +#define OFFSET_SP (29 * 4) +#define OFFSET_LO (32 * 4) +#define OFFSET_HI (33 * 4) +#define OFFSET_CAUSE (34 * 4) +#define OFFSET_STATUS (35 * 4) +#define OFFSET_EPC (36 * 4) + +#define CP0_STATUS 12 +#define CP0_CAUSE 13 +#define CP0_EPC 14 + + +.set noat +.globl __am_asm_trap +__am_asm_trap: + move $k1, $sp + addiu $sp, $sp, -CONTEXT_SIZE + + MAP(REGS, PUSH) + + sw $k1, OFFSET_SP($sp) + + mflo $t0 + mfhi $t1 + mfc0 $t2, $CP0_CAUSE + mfc0 $t3, $CP0_STATUS + mfc0 $t4, $CP0_EPC + sw $t0, OFFSET_LO($sp) + sw $t1, OFFSET_HI($sp) + sw $t2, OFFSET_CAUSE($sp) + sw $t3, OFFSET_STATUS($sp) + sw $t4, OFFSET_EPC($sp) + + # allow nested exception + li $a0, ~0x3 + and $t3, $t3, $a0 # clear status.exl and status.ie + mtc0 $t3, $CP0_STATUS + + move $a0, $sp + jal __am_irq_handle + + lw $t0, OFFSET_LO($sp) + lw $t1, OFFSET_HI($sp) + lw $t3, OFFSET_STATUS($sp) + lw $t4, OFFSET_EPC($sp) + + # set status.exl + ori $t3, $t3, 0x2 + + mtlo $t0 + mthi $t1 + mtc0 $t3, $CP0_STATUS + mtc0 $t4, $CP0_EPC + + MAP(REGS, POP) + + addiu $sp, $sp, CONTEXT_SIZE + eret diff --git a/abstract-machine/am/src/mips/nemu/vme.c b/abstract-machine/am/src/mips/nemu/vme.c new file mode 100644 index 0000000..88c0211 --- /dev/null +++ b/abstract-machine/am/src/mips/nemu/vme.c @@ -0,0 +1,42 @@ +#include +#include + +#define USER_SPACE RANGE(0x40000000, 0x80000000) + +static void* (*pgalloc_usr)(int) = NULL; +static void (*pgfree_usr)(void*) = NULL; +static int vme_enable = 0; +static PTE *cur_pdir = NULL; + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + pgalloc_usr = pgalloc_f; + pgfree_usr = pgfree_f; + vme_enable = 1; + return true; +} + +void protect(AddrSpace *as) { + as->ptr = (PTE*)(pgalloc_usr(PGSIZE)); + as->pgsize = PGSIZE; + as->area = USER_SPACE; +} + +void unprotect(AddrSpace *as) { +} + +void __am_get_cur_as(Context *c) { + c->pdir = (vme_enable ? cur_pdir : NULL); +} + +void __am_switch(Context *c) { + if (vme_enable && c->pdir != NULL) { + cur_pdir = c->pdir; + } +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/native/cte.c b/abstract-machine/am/src/native/cte.c new file mode 100644 index 0000000..58ef781 --- /dev/null +++ b/abstract-machine/am/src/native/cte.c @@ -0,0 +1,199 @@ +#include +#include +#include "platform.h" + +#define TIMER_HZ 100 +#define SYSCALL_INSTR_LEN 7 + +static Context* (*user_handler)(Event, Context*) = NULL; + +void __am_kcontext_start(); +void __am_switch(Context *c); +int __am_in_userspace(void *addr); +void __am_pmem_protect(); +void __am_pmem_unprotect(); + +void __am_panic_on_return() { panic("should not reach here\n"); } + +static void irq_handle(Context *c) { + c->vm_head = thiscpu->vm_head; + c->ksp = thiscpu->ksp; + + if (thiscpu->ev.event == EVENT_ERROR) { + uintptr_t rip = c->uc.uc_mcontext.gregs[REG_RIP]; + printf("Unhandle signal '%s' at rip = %p, badaddr = %p, cause = 0x%x\n", + thiscpu->ev.msg, rip, thiscpu->ev.ref, thiscpu->ev.cause); + assert(0); + } + c = user_handler(thiscpu->ev, c); + assert(c != NULL); + + __am_switch(c); + + // magic call to restore context + void (*p)(Context *c) = (void *)(uintptr_t)0x100008; + p(c); + __am_panic_on_return(); +} + +static void setup_stack(uintptr_t event, ucontext_t *uc) { + void *rip = (void *)uc->uc_mcontext.gregs[REG_RIP]; + extern uint8_t _start, _etext; + int trap_from_user = __am_in_userspace(rip); + int signal_safe = IN_RANGE(rip, RANGE(&_start, &_etext)) || trap_from_user || + // Hack here: "+13" points to the instruction after syscall. This is the + // instruction which will trigger the pending signal if interrupt is enabled. + (rip == (void *)&sigprocmask + 13); + + if (((event == EVENT_IRQ_IODEV) || (event == EVENT_IRQ_TIMER)) && !signal_safe) { + // Shared libraries contain code which are not reenterable. + // If the signal comes when executing code in shared libraries, + // the signal handler can not call any function which is not signal-safe, + // else the behavior is undefined (may be dead lock). + // To handle this, we just refuse to handle the signal and return directly + // to pretend missing the interrupt. + // See man 7 signal-safety for more information. + return; + } + + if (trap_from_user) __am_pmem_unprotect(); + + // skip the instructions causing SIGSEGV for syscall + if (event == EVENT_SYSCALL) { rip += SYSCALL_INSTR_LEN; } + uc->uc_mcontext.gregs[REG_RIP] = (uintptr_t)rip; + + // switch to kernel stack if we were previously in user space + uintptr_t rsp = trap_from_user ? thiscpu->ksp : uc->uc_mcontext.gregs[REG_RSP]; + rsp -= sizeof(Context); + // keep (rsp + 8) % 16 == 0 to support SSE + if ((rsp + 8) % 16 != 0) rsp -= 8; + Context *c = (void *)rsp; + + // save the context on the stack + c->uc = *uc; + + // disable interrupt + __am_get_intr_sigmask(&uc->uc_sigmask); + + // call irq_handle after returning from the signal handler + uc->uc_mcontext.gregs[REG_RDI] = (uintptr_t)c; + uc->uc_mcontext.gregs[REG_RIP] = (uintptr_t)irq_handle; + uc->uc_mcontext.gregs[REG_RSP] = (uintptr_t)c; +} + +static void iret(ucontext_t *uc) { + Context *c = (void *)uc->uc_mcontext.gregs[REG_RDI]; + // restore the context + *uc = c->uc; + thiscpu->ksp = c->ksp; + if (__am_in_userspace((void *)uc->uc_mcontext.gregs[REG_RIP])) __am_pmem_protect(); +} + +static void sig_handler(int sig, siginfo_t *info, void *ucontext) { + thiscpu->ev = (Event) {0}; + thiscpu->ev.event = EVENT_ERROR; + switch (sig) { + case SIGUSR1: thiscpu->ev.event = EVENT_IRQ_IODEV; break; + case SIGUSR2: thiscpu->ev.event = EVENT_YIELD; break; + case SIGVTALRM: thiscpu->ev.event = EVENT_IRQ_TIMER; break; + case SIGSEGV: + if (info->si_code == SEGV_ACCERR) { + switch ((uintptr_t)info->si_addr) { + case 0x100000: thiscpu->ev.event = EVENT_SYSCALL; break; + case 0x100008: iret(ucontext); return; + } + } + if (__am_in_userspace(info->si_addr)) { + assert(thiscpu->ev.event == EVENT_ERROR); + thiscpu->ev.event = EVENT_PAGEFAULT; + switch (info->si_code) { + case SEGV_MAPERR: thiscpu->ev.cause = MMAP_READ; break; + // we do not support mapped user pages with MMAP_NONE + case SEGV_ACCERR: thiscpu->ev.cause = MMAP_WRITE; break; + default: assert(0); + } + thiscpu->ev.ref = (uintptr_t)info->si_addr; + } + break; + } + + if (thiscpu->ev.event == EVENT_ERROR) { + thiscpu->ev.ref = (uintptr_t)info->si_addr; + thiscpu->ev.cause = (uintptr_t)info->si_code; + thiscpu->ev.msg = strsignal(sig); + } + setup_stack(thiscpu->ev.event, ucontext); +} + +// signal handlers are inherited across fork() +static void install_signal_handler() { + struct sigaction s; + memset(&s, 0, sizeof(s)); + s.sa_sigaction = sig_handler; + s.sa_flags = SA_SIGINFO | SA_RESTART | SA_ONSTACK; + __am_get_intr_sigmask(&s.sa_mask); + + int ret = sigaction(SIGVTALRM, &s, NULL); + assert(ret == 0); + ret = sigaction(SIGUSR1, &s, NULL); + assert(ret == 0); + ret = sigaction(SIGUSR2, &s, NULL); + assert(ret == 0); + ret = sigaction(SIGSEGV, &s, NULL); + assert(ret == 0); +} + +// setitimer() are inherited across fork(), should be called again from children +void __am_init_timer_irq() { + iset(0); + + struct itimerval it = {}; + it.it_value.tv_sec = 0; + it.it_value.tv_usec = 1000000 / TIMER_HZ; + it.it_interval = it.it_value; + int ret = setitimer(ITIMER_VIRTUAL, &it, NULL); + assert(ret == 0); +} + +bool cte_init(Context*(*handler)(Event, Context*)) { + user_handler = handler; + + install_signal_handler(); + __am_init_timer_irq(); + return true; +} + +Context* kcontext(Area kstack, void (*entry)(void *), void *arg) { + Context *c = (Context*)kstack.end - 1; + + __am_get_example_uc(c); + c->uc.uc_mcontext.gregs[REG_RIP] = (uintptr_t)__am_kcontext_start; + c->uc.uc_mcontext.gregs[REG_RSP] = (uintptr_t)kstack.end; + + int ret = sigemptyset(&(c->uc.uc_sigmask)); // enable interrupt + assert(ret == 0); + + c->vm_head = NULL; + + c->GPR1 = (uintptr_t)arg; + c->GPR2 = (uintptr_t)entry; + return c; +} + +void yield() { + raise(SIGUSR2); +} + +bool ienabled() { + sigset_t set; + int ret = sigprocmask(0, NULL, &set); + assert(ret == 0); + return __am_is_sigmask_sti(&set); +} + +void iset(bool enable) { + extern sigset_t __am_intr_sigmask; + // NOTE: sigprocmask does not supported in multithreading + int ret = sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &__am_intr_sigmask, NULL); + assert(ret == 0); +} diff --git a/abstract-machine/am/src/native/ioe.c b/abstract-machine/am/src/native/ioe.c new file mode 100644 index 0000000..0cd02c7 --- /dev/null +++ b/abstract-machine/am/src/native/ioe.c @@ -0,0 +1,79 @@ +#include +#include + +bool __am_has_ioe = false; +static bool ioe_init_done = false; + +void __am_timer_init(); +void __am_gpu_init(); +void __am_input_init(); +void __am_audio_init(); +void __am_disk_init(); +void __am_input_config(AM_INPUT_CONFIG_T *); +void __am_timer_config(AM_TIMER_CONFIG_T *); +void __am_timer_rtc(AM_TIMER_RTC_T *); +void __am_timer_uptime(AM_TIMER_UPTIME_T *); +void __am_input_keybrd(AM_INPUT_KEYBRD_T *); +void __am_gpu_config(AM_GPU_CONFIG_T *); +void __am_gpu_status(AM_GPU_STATUS_T *); +void __am_gpu_fbdraw(AM_GPU_FBDRAW_T *); +void __am_audio_config(AM_AUDIO_CONFIG_T *); +void __am_audio_ctrl(AM_AUDIO_CTRL_T *); +void __am_audio_status(AM_AUDIO_STATUS_T *); +void __am_audio_play(AM_AUDIO_PLAY_T *); +void __am_disk_config(AM_DISK_CONFIG_T *cfg); +void __am_disk_status(AM_DISK_STATUS_T *stat); +void __am_disk_blkio(AM_DISK_BLKIO_T *io); +static void __am_uart_config(AM_UART_CONFIG_T *cfg) { cfg->present = false; } +static void __am_net_config (AM_NET_CONFIG_T *cfg) { cfg->present = false; } + +typedef void (*handler_t)(void *buf); +static void *lut[128] = { + [AM_TIMER_CONFIG] = __am_timer_config, + [AM_TIMER_RTC ] = __am_timer_rtc, + [AM_TIMER_UPTIME] = __am_timer_uptime, + [AM_INPUT_CONFIG] = __am_input_config, + [AM_INPUT_KEYBRD] = __am_input_keybrd, + [AM_GPU_CONFIG ] = __am_gpu_config, + [AM_GPU_FBDRAW ] = __am_gpu_fbdraw, + [AM_GPU_STATUS ] = __am_gpu_status, + [AM_UART_CONFIG ] = __am_uart_config, + [AM_AUDIO_CONFIG] = __am_audio_config, + [AM_AUDIO_CTRL ] = __am_audio_ctrl, + [AM_AUDIO_STATUS] = __am_audio_status, + [AM_AUDIO_PLAY ] = __am_audio_play, + [AM_DISK_CONFIG ] = __am_disk_config, + [AM_DISK_STATUS ] = __am_disk_status, + [AM_DISK_BLKIO ] = __am_disk_blkio, + [AM_NET_CONFIG ] = __am_net_config, +}; + +bool ioe_init() { + panic_on(cpu_current() != 0, "call ioe_init() in other CPUs"); + panic_on(ioe_init_done, "double-initialization"); + __am_has_ioe = true; + return true; +} + +static void fail(void *buf) { panic("access nonexist register"); } + +void __am_ioe_init() { + for (int i = 0; i < LENGTH(lut); i++) + if (!lut[i]) lut[i] = fail; + __am_timer_init(); + __am_gpu_init(); + __am_input_init(); + __am_audio_init(); + __am_disk_init(); + ioe_init_done = true; +} + +static void do_io(int reg, void *buf) { + if (!ioe_init_done) { + __am_ioe_init(); + } + ((handler_t)lut[reg])(buf); +} + +void ioe_read (int reg, void *buf) { do_io(reg, buf); } +void ioe_write(int reg, void *buf) { do_io(reg, buf); } diff --git a/abstract-machine/am/src/native/ioe/audio.c b/abstract-machine/am/src/native/ioe/audio.c new file mode 100644 index 0000000..de19552 --- /dev/null +++ b/abstract-machine/am/src/native/ioe/audio.c @@ -0,0 +1,72 @@ +#define _GNU_SOURCE +#include +#include +#include +#include + +static int rfd = -1, wfd = -1; +static volatile int count = 0; + +void __am_audio_init() { + int fds[2]; + int ret = pipe2(fds, O_NONBLOCK); + assert(ret == 0); + rfd = fds[0]; + wfd = fds[1]; +} + +static void audio_play(void *userdata, uint8_t *stream, int len) { + int nread = len; + if (count < len) nread = count; + int b = 0; + while (b < nread) { + int n = read(rfd, stream, nread); + if (n > 0) b += n; + } + + count -= nread; + if (len > nread) { + memset(stream + nread, 0, len - nread); + } +} + +static void audio_write(uint8_t *buf, int len) { + int nwrite = 0; + while (nwrite < len) { + int n = write(wfd, buf, len); + if (n == -1) n = 0; + count += n; + nwrite += n; + } +} + +void __am_audio_ctrl(AM_AUDIO_CTRL_T *ctrl) { + SDL_AudioSpec s = {}; + s.freq = ctrl->freq; + s.format = AUDIO_S16SYS; + s.channels = ctrl->channels; + s.samples = ctrl->samples; + s.callback = audio_play; + s.userdata = NULL; + + count = 0; + int ret = SDL_InitSubSystem(SDL_INIT_AUDIO); + if (ret == 0) { + SDL_OpenAudio(&s, NULL); + SDL_PauseAudio(0); + } +} + +void __am_audio_status(AM_AUDIO_STATUS_T *stat) { + stat->count = count; +} + +void __am_audio_play(AM_AUDIO_PLAY_T *ctl) { + int len = ctl->buf.end - ctl->buf.start; + audio_write(ctl->buf.start, len); +} + +void __am_audio_config(AM_AUDIO_CONFIG_T *cfg) { + cfg->present = true; + cfg->bufsize = fcntl(rfd, F_GETPIPE_SZ); +} diff --git a/abstract-machine/am/src/native/ioe/disk.c b/abstract-machine/am/src/native/ioe/disk.c new file mode 100644 index 0000000..7939a44 --- /dev/null +++ b/abstract-machine/am/src/native/ioe/disk.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include + +#define BLKSZ 512 + +static int disk_size = 0; +static FILE *fp = NULL; + +void __am_disk_init() { + const char *diskimg = getenv("diskimg"); + if (diskimg) { + fp = fopen(diskimg, "r+"); + if (fp) { + fseek(fp, 0, SEEK_END); + disk_size = (ftell(fp) + 511) / 512; + rewind(fp); + } + } +} + +void __am_disk_config(AM_DISK_CONFIG_T *cfg) { + cfg->present = (fp != NULL); + cfg->blksz = BLKSZ; + cfg->blkcnt = disk_size; +} + +void __am_disk_status(AM_DISK_STATUS_T *stat) { + stat->ready = 1; +} + +void __am_disk_blkio(AM_DISK_BLKIO_T *io) { + if (fp) { + fseek(fp, io->blkno * BLKSZ, SEEK_SET); + int ret; + if (io->write) ret = fwrite(io->buf, io->blkcnt * BLKSZ, 1, fp); + else ret = fread(io->buf, io->blkcnt * BLKSZ, 1, fp); + assert(ret == 1); + } +} diff --git a/abstract-machine/am/src/native/ioe/gpu.c b/abstract-machine/am/src/native/ioe/gpu.c new file mode 100644 index 0000000..0309d50 --- /dev/null +++ b/abstract-machine/am/src/native/ioe/gpu.c @@ -0,0 +1,66 @@ +#include +#include +#include + +//#define MODE_800x600 +#ifdef MODE_800x600 +# define W 800 +# define H 600 +#else +# define W 400 +# define H 300 +#endif + +#define FPS 60 + +#define RMASK 0x00ff0000 +#define GMASK 0x0000ff00 +#define BMASK 0x000000ff +#define AMASK 0x00000000 + +static SDL_Window *window = NULL; +static SDL_Surface *surface = NULL; + +static Uint32 texture_sync(Uint32 interval, void *param) { + SDL_BlitScaled(surface, NULL, SDL_GetWindowSurface(window), NULL); + SDL_UpdateWindowSurface(window); + return interval; +} + +void __am_gpu_init() { + SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER); + window = SDL_CreateWindow("Native Application", + SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, +#ifdef MODE_800x600 + W, H, +#else + W * 2, H * 2, +#endif + SDL_WINDOW_OPENGL); + surface = SDL_CreateRGBSurface(SDL_SWSURFACE, W, H, 32, + RMASK, GMASK, BMASK, AMASK); + SDL_AddTimer(1000 / FPS, texture_sync, NULL); +} + +void __am_gpu_config(AM_GPU_CONFIG_T *cfg) { + *cfg = (AM_GPU_CONFIG_T) { + .present = true, .has_accel = false, + .width = W, .height = H, + .vmemsz = 0 + }; +} + +void __am_gpu_status(AM_GPU_STATUS_T *stat) { + stat->ready = true; +} + +void __am_gpu_fbdraw(AM_GPU_FBDRAW_T *ctl) { + int x = ctl->x, y = ctl->y, w = ctl->w, h = ctl->h; + if (w == 0 || h == 0) return; + feclearexcept(-1); + SDL_Surface *s = SDL_CreateRGBSurfaceFrom(ctl->pixels, w, h, 32, w * sizeof(uint32_t), + RMASK, GMASK, BMASK, AMASK); + SDL_Rect rect = { .x = x, .y = y }; + SDL_BlitSurface(s, NULL, surface, &rect); + SDL_FreeSurface(s); +} diff --git a/abstract-machine/am/src/native/ioe/input.c b/abstract-machine/am/src/native/ioe/input.c new file mode 100644 index 0000000..17c7f34 --- /dev/null +++ b/abstract-machine/am/src/native/ioe/input.c @@ -0,0 +1,63 @@ +#include +#include + +#define KEYDOWN_MASK 0x8000 + +#define KEY_QUEUE_LEN 1024 +static int key_queue[KEY_QUEUE_LEN] = {}; +static int key_f = 0, key_r = 0; +static SDL_mutex *key_queue_lock = NULL; + +#define XX(k) [SDL_SCANCODE_##k] = AM_KEY_##k, +static int keymap[256] = { + AM_KEYS(XX) +}; + +static int event_thread(void *args) { + SDL_Event event; + while (1) { + SDL_WaitEvent(&event); + switch (event.type) { + case SDL_QUIT: halt(0); + case SDL_KEYDOWN: + case SDL_KEYUP: { + SDL_Keysym k = event.key.keysym; + int keydown = event.key.type == SDL_KEYDOWN; + int scancode = k.scancode; + if (keymap[scancode] != 0) { + int am_code = keymap[scancode] | (keydown ? KEYDOWN_MASK : 0); + SDL_LockMutex(key_queue_lock); + key_queue[key_r] = am_code; + key_r = (key_r + 1) % KEY_QUEUE_LEN; + SDL_UnlockMutex(key_queue_lock); + void __am_send_kbd_intr(); + __am_send_kbd_intr(); + } + break; + } + } + } +} + +void __am_input_init() { + key_queue_lock = SDL_CreateMutex(); + SDL_CreateThread(event_thread, "event thread", NULL); +} + +void __am_input_config(AM_INPUT_CONFIG_T *cfg) { + cfg->present = true; +} + +void __am_input_keybrd(AM_INPUT_KEYBRD_T *kbd) { + int k = AM_KEY_NONE; + + SDL_LockMutex(key_queue_lock); + if (key_f != key_r) { + k = key_queue[key_f]; + key_f = (key_f + 1) % KEY_QUEUE_LEN; + } + SDL_UnlockMutex(key_queue_lock); + + kbd->keydown = (k & KEYDOWN_MASK ? true : false); + kbd->keycode = k & ~KEYDOWN_MASK; +} diff --git a/abstract-machine/am/src/native/ioe/timer.c b/abstract-machine/am/src/native/ioe/timer.c new file mode 100644 index 0000000..fae45b1 --- /dev/null +++ b/abstract-machine/am/src/native/ioe/timer.c @@ -0,0 +1,32 @@ +#include +#include +#include + +static struct timeval boot_time = {}; + +void __am_timer_config(AM_TIMER_CONFIG_T *cfg) { + cfg->present = cfg->has_rtc = true; +} + +void __am_timer_rtc(AM_TIMER_RTC_T *rtc) { + time_t t = time(NULL); + struct tm *tm = localtime(&t); + rtc->second = tm->tm_sec; + rtc->minute = tm->tm_min; + rtc->hour = tm->tm_hour; + rtc->day = tm->tm_mday; + rtc->month = tm->tm_mon + 1; + rtc->year = tm->tm_year + 1900; +} + +void __am_timer_uptime(AM_TIMER_UPTIME_T *uptime) { + struct timeval now; + gettimeofday(&now, NULL); + long seconds = now.tv_sec - boot_time.tv_sec; + long useconds = now.tv_usec - boot_time.tv_usec; + uptime->us = seconds * 1000000 + (useconds + 500); +} + +void __am_timer_init() { + gettimeofday(&boot_time, NULL); +} diff --git a/abstract-machine/am/src/native/mpe.c b/abstract-machine/am/src/native/mpe.c new file mode 100644 index 0000000..a25c0fe --- /dev/null +++ b/abstract-machine/am/src/native/mpe.c @@ -0,0 +1,51 @@ +#include +#include "platform.h" + +int __am_mpe_init = 0; +extern bool __am_has_ioe; +void __am_ioe_init(); + +bool mpe_init(void (*entry)()) { + __am_mpe_init = 1; + + int sync_pipe[2]; + assert(0 == pipe(sync_pipe)); + + for (int i = 1; i < cpu_count(); i++) { + if (fork() == 0) { + char ch; + assert(read(sync_pipe[0], &ch, 1) == 1); + assert(ch == '+'); + close(sync_pipe[0]); close(sync_pipe[1]); + + thiscpu->cpuid = i; + __am_init_timer_irq(); + entry(); + } + } + + if (__am_has_ioe) { + __am_ioe_init(); + } + + for (int i = 1; i < cpu_count(); i++) { + assert(write(sync_pipe[1], "+", 1) == 1); + } + close(sync_pipe[0]); close(sync_pipe[1]); + + entry(); + panic("MP entry should not return\n"); +} + +int cpu_count() { + extern int __am_ncpu; + return __am_ncpu; +} + +int cpu_current() { + return thiscpu->cpuid; +} + +int atomic_xchg(int *addr, int newval) { + return atomic_exchange((int *)addr, newval); +} diff --git a/abstract-machine/am/src/native/platform.c b/abstract-machine/am/src/native/platform.c new file mode 100644 index 0000000..02f8941 --- /dev/null +++ b/abstract-machine/am/src/native/platform.c @@ -0,0 +1,230 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "platform.h" + +#define MAX_CPU 16 +#define TRAP_PAGE_START (void *)0x100000 +#define PMEM_START (void *)0x1000000 // for nanos-lite with vme disabled +#define PMEM_SIZE (128 * 1024 * 1024) // 128MB +static int pmem_fd = 0; +static void *pmem = NULL; +static ucontext_t uc_example = {}; +static void *(*memcpy_libc)(void *, const void *, size_t) = NULL; +sigset_t __am_intr_sigmask = {}; +__am_cpu_t *__am_cpu_struct = NULL; +int __am_ncpu = 0; +int __am_pgsize = 0; + +static void save_context_handler(int sig, siginfo_t *info, void *ucontext) { + memcpy_libc(&uc_example, ucontext, sizeof(uc_example)); +} + +static void save_example_context() { + // getcontext() does not save segment registers. In the signal + // handler, restoring a context previously saved by getcontext() + // will trigger segmentation fault because of the invalid segment + // registers. So we save the example context during signal handling + // to get a context with everything valid. + struct sigaction s; + void *(*memset_libc)(void *, int, size_t) = dlsym(RTLD_NEXT, "memset"); + memset_libc(&s, 0, sizeof(s)); + s.sa_sigaction = save_context_handler; + s.sa_flags = SA_SIGINFO; + int ret = sigaction(SIGUSR1, &s, NULL); + assert(ret == 0); + + raise(SIGUSR1); + + s.sa_flags = 0; + s.sa_handler = SIG_DFL; + ret = sigaction(SIGUSR1, &s, NULL); + assert(ret == 0); +} + +static void setup_sigaltstack() { + assert(sizeof(thiscpu->sigstack) >= SIGSTKSZ); + stack_t ss; + ss.ss_sp = thiscpu->sigstack; + ss.ss_size = sizeof(thiscpu->sigstack); + ss.ss_flags = 0; + int ret = sigaltstack(&ss, NULL); + assert(ret == 0); +} + +int main(const char *args); + +static void init_platform() __attribute__((constructor)); +static void init_platform() { + // create memory object and set up mapping to simulate the physical memory + pmem_fd = memfd_create("pmem", 0); + assert(pmem_fd != -1); + // use dynamic linking to avoid linking to the same function in RT-Thread + int (*ftruncate_libc)(int, off_t) = dlsym(RTLD_NEXT, "ftruncate"); + assert(ftruncate_libc != NULL); + int ret2 = ftruncate_libc(pmem_fd, PMEM_SIZE); + assert(ret2 == 0); + + pmem = mmap(PMEM_START, PMEM_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_SHARED | MAP_FIXED, pmem_fd, 0); + assert(pmem != (void *)-1); + + // allocate private per-cpu structure + thiscpu = mmap(NULL, sizeof(*thiscpu), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(thiscpu != (void *)-1); + thiscpu->cpuid = 0; + thiscpu->vm_head = NULL; + + // create trap page to receive syscall and yield by SIGSEGV + int sys_pgsz = sysconf(_SC_PAGESIZE); + void *ret = mmap(TRAP_PAGE_START, sys_pgsz, PROT_NONE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(ret != (void *)-1); + + // save the address of memcpy() in glibc, since it may be linked with klib + memcpy_libc = dlsym(RTLD_NEXT, "memcpy"); + assert(memcpy_libc != NULL); + + // remap writable sections as MAP_SHARED + Elf64_Phdr *phdr = (void *)getauxval(AT_PHDR); + int phnum = (int)getauxval(AT_PHNUM); + int i; + for (i = 0; i < phnum; i ++) { + if (phdr[i].p_type == PT_LOAD && (phdr[i].p_flags & PF_W)) { + // allocate temporary memory + extern char end; + void *vaddr = (void *)&end - phdr[i].p_memsz; + uintptr_t pad = (uintptr_t)vaddr & 0xfff; + void *vaddr_align = vaddr - pad; + uintptr_t size = phdr[i].p_memsz + pad; + void *temp_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(temp_mem != (void *)-1); + + // save data and bss sections + memcpy_libc(temp_mem, vaddr_align, size); + + // save the address of mmap() which will be used after munamp(), + // since calling the library functions requires accessing GOT, which will be unmapped + void *(*mmap_libc)(void *, size_t, int, int, int, off_t) = dlsym(RTLD_NEXT, "mmap"); + assert(mmap_libc != NULL); + // load the address of memcpy() on stack, which can still be accessed + // after the data section is unmapped + void *(*volatile memcpy_libc_temp)(void *, const void *, size_t) = memcpy_libc; + + // unmap the data and bss sections + ret2 = munmap(vaddr_align, size); + assert(ret2 == 0); + + // map the sections again with MAP_SHARED, which will be shared across fork() + ret = mmap_libc(vaddr_align, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, -1, 0); + assert(ret == vaddr_align); + + // restore the data in the sections + memcpy_libc_temp(vaddr_align, temp_mem, size); + + // unmap the temporary memory + ret2 = munmap(temp_mem, size); + assert(ret2 == 0); + } + } + + // set up the AM heap + heap = RANGE(pmem, pmem + PMEM_SIZE); + + // initialize sigmask for interrupts + ret2 = sigemptyset(&__am_intr_sigmask); + assert(ret2 == 0); + ret2 = sigaddset(&__am_intr_sigmask, SIGVTALRM); + assert(ret2 == 0); + ret2 = sigaddset(&__am_intr_sigmask, SIGUSR1); + assert(ret2 == 0); + + // setup alternative signal stack + setup_sigaltstack(); + + // save the context template + save_example_context(); + uc_example.uc_mcontext.fpregs = NULL; // clear the FPU context + __am_get_intr_sigmask(&uc_example.uc_sigmask); + + // disable interrupts by default + iset(0); + + // set ncpu + const char *smp = getenv("smp"); + __am_ncpu = smp ? atoi(smp) : 1; + assert(0 < __am_ncpu && __am_ncpu <= MAX_CPU); + + // set pgsize + const char *pgsize = getenv("pgsize"); + __am_pgsize = pgsize ? atoi(pgsize) : sys_pgsz; + assert(__am_pgsize > 0 && __am_pgsize % sys_pgsz == 0); + + // set stdout unbuffered + setbuf(stdout, NULL); + + const char *args = getenv("mainargs"); + halt(main(args ? args : "")); // call main here! +} + +void __am_exit_platform(int code) { + // let Linux clean up other resource + extern int __am_mpe_init; + if (__am_mpe_init && cpu_count() > 1) kill(0, SIGKILL); + exit(code); +} + +void __am_pmem_map(void *va, void *pa, int prot) { + // translate AM prot to mmap prot + int mmap_prot = PROT_NONE; + // we do not support executable bit, so mark + // all readable pages executable as well + if (prot & MMAP_READ) mmap_prot |= PROT_READ | PROT_EXEC; + if (prot & MMAP_WRITE) mmap_prot |= PROT_WRITE; + void *ret = mmap(va, __am_pgsize, mmap_prot, + MAP_SHARED | MAP_FIXED, pmem_fd, (uintptr_t)(pa - pmem)); + assert(ret != (void *)-1); +} + +void __am_pmem_unmap(void *va) { + int ret = munmap(va, __am_pgsize); + assert(ret == 0); +} + +void __am_get_example_uc(Context *r) { + memcpy_libc(&r->uc, &uc_example, sizeof(uc_example)); +} + +void __am_get_intr_sigmask(sigset_t *s) { + memcpy_libc(s, &__am_intr_sigmask, sizeof(__am_intr_sigmask)); +} + +int __am_is_sigmask_sti(sigset_t *s) { + return !sigismember(s, SIGVTALRM); +} + +void __am_send_kbd_intr() { + kill(getpid(), SIGUSR1); +} + +void __am_pmem_protect() { +// int ret = mprotect(PMEM_START, PMEM_SIZE, PROT_NONE); +// assert(ret == 0); +} + +void __am_pmem_unprotect() { +// int ret = mprotect(PMEM_START, PMEM_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC); +// assert(ret == 0); +} + +// This dummy function will be called in trm.c. +// The purpose of this dummy function is to let linker add this file to the object +// file set. Without it, the constructor of @_init_platform will not be linked. +void __am_platform_dummy() { +} diff --git a/abstract-machine/am/src/native/platform.h b/abstract-machine/am/src/native/platform.h new file mode 100644 index 0000000..64e775a --- /dev/null +++ b/abstract-machine/am/src/native/platform.h @@ -0,0 +1,28 @@ +#ifndef __PLATFORM_H__ +#define __PLATFORM_H__ + +#include +#include +#include +#include +#include + +void __am_get_example_uc(Context *r); +void __am_get_intr_sigmask(sigset_t *s); +int __am_is_sigmask_sti(sigset_t *s); +void __am_init_timer_irq(); +void __am_pmem_map(void *va, void *pa, int prot); +void __am_pmem_unmap(void *va); + +// per-cpu structure +typedef struct { + void *vm_head; + uintptr_t ksp; + int cpuid; + Event ev; // similar to cause register in mips/riscv + uint8_t sigstack[32768]; +} __am_cpu_t; +extern __am_cpu_t *__am_cpu_struct; +#define thiscpu __am_cpu_struct + +#endif diff --git a/abstract-machine/am/src/native/trap.S b/abstract-machine/am/src/native/trap.S new file mode 100644 index 0000000..ac9107a --- /dev/null +++ b/abstract-machine/am/src/native/trap.S @@ -0,0 +1,10 @@ +.global __am_kcontext_start +__am_kcontext_start: + // rdi = arg, rsi = entry + + // (rsp + 8) should be multiple of 16 when + // control is transfered to the function entry point. + // See amd64 ABI manual for more details + andq $0xfffffffffffffff0, %rsp + call *%rsi + call __am_panic_on_return diff --git a/abstract-machine/am/src/native/trm.c b/abstract-machine/am/src/native/trm.c new file mode 100644 index 0000000..95c9295 --- /dev/null +++ b/abstract-machine/am/src/native/trm.c @@ -0,0 +1,30 @@ +#include +#include +#include + +void __am_platform_dummy(); +void __am_exit_platform(int code); + +void trm_init() { + __am_platform_dummy(); +} + +void putch(char ch) { + putchar(ch); +} + +void halt(int code) { + const char *fmt = "Exit code = 40h\n"; + for (const char *p = fmt; *p; p++) { + char ch = *p; + if (ch == '0' || ch == '4') { + ch = "0123456789abcdef"[(code >> (ch - '0')) & 0xf]; + } + putch(ch); + } + __am_exit_platform(code); + putstr("Should not reach here!\n"); + while (1); +} + +Area heap = {}; diff --git a/abstract-machine/am/src/native/vme.c b/abstract-machine/am/src/native/vme.c new file mode 100644 index 0000000..5b622cd --- /dev/null +++ b/abstract-machine/am/src/native/vme.c @@ -0,0 +1,141 @@ +#define _GNU_SOURCE +#include +#include "platform.h" + +#define USER_SPACE RANGE(0x40000000, 0xc0000000) + +typedef struct PageMap { + void *va; + void *pa; + struct PageMap *next; + int prot; + int is_mapped; + char key[32]; // used for hsearch_r() +} PageMap; + +typedef struct VMHead { + PageMap *head; + struct hsearch_data hash; + int nr_page; +} VMHead; + +#define list_foreach(p, head) \ + for (p = (PageMap *)(head); p != NULL; p = p->next) + +extern int __am_pgsize; +static int vme_enable = 0; +static void* (*pgalloc)(int) = NULL; +static void (*pgfree)(void *) = NULL; + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + pgalloc = pgalloc_f; + pgfree = pgfree_f; + vme_enable = 1; + return true; +} + +void protect(AddrSpace *as) { + assert(as != NULL); + VMHead *h = pgalloc(__am_pgsize); // used as head of the list + assert(h != NULL); + memset(h, 0, sizeof(*h)); + int max_pg = (USER_SPACE.end - USER_SPACE.start) / __am_pgsize; + int ret = hcreate_r(max_pg, &h->hash); + assert(ret != 0); + + as->ptr = h; + as->pgsize = __am_pgsize; + as->area = USER_SPACE; +} + +void unprotect(AddrSpace *as) { +} + +void __am_switch(Context *c) { + if (!vme_enable) return; + + VMHead *head = c->vm_head; + VMHead *now_head = thiscpu->vm_head; + if (head == now_head) goto end; + + PageMap *pp; + if (now_head != NULL) { + // munmap all mappings + list_foreach(pp, now_head->head) { + if (pp->is_mapped) { + __am_pmem_unmap(pp->va); + pp->is_mapped = false; + } + } + } + + if (head != NULL) { + // mmap all mappings + list_foreach(pp, head->head) { + assert(IN_RANGE(pp->va, USER_SPACE)); + __am_pmem_map(pp->va, pp->pa, pp->prot); + pp->is_mapped = true; + } + } + +end: + thiscpu->vm_head = head; +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { + assert(IN_RANGE(va, USER_SPACE)); + assert((uintptr_t)va % __am_pgsize == 0); + assert((uintptr_t)pa % __am_pgsize == 0); + assert(as != NULL); + PageMap *pp = NULL; + VMHead *vm_head = as->ptr; + assert(vm_head != NULL); + char buf[32]; + snprintf(buf, 32, "%x", va); + ENTRY item = { .key = buf }; + ENTRY *item_find; + hsearch_r(item, FIND, &item_find, &vm_head->hash); + if (item_find == NULL) { + pp = pgalloc(__am_pgsize); // this will waste memory, any better idea? + snprintf(pp->key, 32, "%x", va); + item.key = pp->key; + item.data = pp; + int ret = hsearch_r(item, ENTER, &item_find, &vm_head->hash); + assert(ret != 0); + vm_head->nr_page ++; + } else { + pp = item_find->data; + } + pp->va = va; + pp->pa = pa; + pp->prot = prot; + pp->is_mapped = false; + pp->next = vm_head->head; + vm_head->head = pp; + + if (vm_head == thiscpu->vm_head) { + // enforce the map immediately + __am_pmem_map(pp->va, pp->pa, pp->prot); + pp->is_mapped = true; + } +} + +Context* ucontext(AddrSpace *as, Area kstack, void *entry) { + Context *c = (Context*)kstack.end - 1; + + __am_get_example_uc(c); + c->uc.uc_mcontext.gregs[REG_RIP] = (uintptr_t)entry; + c->uc.uc_mcontext.gregs[REG_RSP] = (uintptr_t)USER_SPACE.end; + + int ret = sigemptyset(&(c->uc.uc_sigmask)); // enable interrupt + assert(ret == 0); + c->vm_head = as->ptr; + + c->ksp = (uintptr_t)kstack.end; + + return c; +} + +int __am_in_userspace(void *addr) { + return vme_enable && thiscpu->vm_head != NULL && IN_RANGE(addr, USER_SPACE); +} diff --git a/abstract-machine/am/src/platform/dummy/cte.c b/abstract-machine/am/src/platform/dummy/cte.c new file mode 100644 index 0000000..c3094ab --- /dev/null +++ b/abstract-machine/am/src/platform/dummy/cte.c @@ -0,0 +1,19 @@ +#include + +bool cte_init(Context*(*handler)(Event, Context*)) { + return false; +} + +Context *kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/platform/dummy/ioe.c b/abstract-machine/am/src/platform/dummy/ioe.c new file mode 100644 index 0000000..369ab7e --- /dev/null +++ b/abstract-machine/am/src/platform/dummy/ioe.c @@ -0,0 +1,11 @@ +#include +#include + +static void fail(void *buf) { panic("access nonexist register"); } + +bool ioe_init() { + return false; +} + +void ioe_read (int reg, void *buf) { fail(buf); } +void ioe_write(int reg, void *buf) { fail(buf); } diff --git a/abstract-machine/am/src/platform/dummy/mpe.c b/abstract-machine/am/src/platform/dummy/mpe.c new file mode 100644 index 0000000..6715aa2 --- /dev/null +++ b/abstract-machine/am/src/platform/dummy/mpe.c @@ -0,0 +1,17 @@ +#include + +bool mpe_init(void (*entry)()) { + return false; +} + +int cpu_count() { + return 1; +} + +int cpu_current() { + return 0; +} + +int atomic_xchg(int *addr, int newval) { + return 0; +} diff --git a/abstract-machine/am/src/platform/dummy/trm.c b/abstract-machine/am/src/platform/dummy/trm.c new file mode 100644 index 0000000..3fd84e2 --- /dev/null +++ b/abstract-machine/am/src/platform/dummy/trm.c @@ -0,0 +1,10 @@ +#include + +Area heap = RANGE(NULL, NULL); + +void putch(char ch) { +} + +void halt(int code) { + while (1); +} diff --git a/abstract-machine/am/src/platform/dummy/vme.c b/abstract-machine/am/src/platform/dummy/vme.c new file mode 100644 index 0000000..5134154 --- /dev/null +++ b/abstract-machine/am/src/platform/dummy/vme.c @@ -0,0 +1,18 @@ +#include + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + return false; +} + +void protect(AddrSpace *as) { +} + +void unprotect(AddrSpace *as) { +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/platform/nemu/include/nemu.h b/abstract-machine/am/src/platform/nemu/include/nemu.h new file mode 100644 index 0000000..0030772 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/include/nemu.h @@ -0,0 +1,50 @@ +#ifndef NEMU_H__ +#define NEMU_H__ + +#include + +#include ISA_H // the macro `ISA_H` is defined in CFLAGS + // it will be expanded as "x86/x86.h", "mips/mips32.h", ... + +#if defined(__ISA_X86__) +# define nemu_trap(code) asm volatile ("int3" : :"a"(code)) +#elif defined(__ISA_MIPS32__) +# define nemu_trap(code) asm volatile ("move $v0, %0; sdbbp" : :"r"(code)) +#elif defined(__riscv) +# define nemu_trap(code) asm volatile("mv a0, %0; ebreak" : :"r"(code)) +#elif defined(__ISA_LOONGARCH32R__) +# define nemu_trap(code) asm volatile("move $a0, %0; break 0" : :"r"(code)) +#elif +# error unsupported ISA __ISA__ +#endif + +#if defined(__ARCH_X86_NEMU) +# define DEVICE_BASE 0x0 +#else +# define DEVICE_BASE 0xa0000000 +#endif + +#define MMIO_BASE 0xa0000000 + +#define SERIAL_PORT (DEVICE_BASE + 0x00003f8) +#define KBD_ADDR (DEVICE_BASE + 0x0000060) +#define RTC_ADDR (DEVICE_BASE + 0x0000048) +#define VGACTL_ADDR (DEVICE_BASE + 0x0000100) +#define AUDIO_ADDR (DEVICE_BASE + 0x0000200) +#define DISK_ADDR (DEVICE_BASE + 0x0000300) +#define FB_ADDR (MMIO_BASE + 0x1000000) +#define AUDIO_SBUF_ADDR (MMIO_BASE + 0x1200000) + +extern char _pmem_start; +#define PMEM_SIZE (128 * 1024 * 1024) +#define PMEM_END ((uintptr_t)&_pmem_start + PMEM_SIZE) +#define NEMU_PADDR_SPACE \ + RANGE(&_pmem_start, PMEM_END), \ + RANGE(FB_ADDR, FB_ADDR + 0x200000), \ + RANGE(MMIO_BASE, MMIO_BASE + 0x1000) /* serial, rtc, screen, keyboard */ + +typedef uintptr_t PTE; + +#define PGSIZE 4096 + +#endif diff --git a/abstract-machine/am/src/platform/nemu/ioe/audio.c b/abstract-machine/am/src/platform/nemu/ioe/audio.c new file mode 100644 index 0000000..81839c4 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/audio.c @@ -0,0 +1,26 @@ +#include +#include + +#define AUDIO_FREQ_ADDR (AUDIO_ADDR + 0x00) +#define AUDIO_CHANNELS_ADDR (AUDIO_ADDR + 0x04) +#define AUDIO_SAMPLES_ADDR (AUDIO_ADDR + 0x08) +#define AUDIO_SBUF_SIZE_ADDR (AUDIO_ADDR + 0x0c) +#define AUDIO_INIT_ADDR (AUDIO_ADDR + 0x10) +#define AUDIO_COUNT_ADDR (AUDIO_ADDR + 0x14) + +void __am_audio_init() { +} + +void __am_audio_config(AM_AUDIO_CONFIG_T *cfg) { + cfg->present = false; +} + +void __am_audio_ctrl(AM_AUDIO_CTRL_T *ctrl) { +} + +void __am_audio_status(AM_AUDIO_STATUS_T *stat) { + stat->count = 0; +} + +void __am_audio_play(AM_AUDIO_PLAY_T *ctl) { +} diff --git a/abstract-machine/am/src/platform/nemu/ioe/disk.c b/abstract-machine/am/src/platform/nemu/ioe/disk.c new file mode 100644 index 0000000..8f5dc81 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/disk.c @@ -0,0 +1,12 @@ +#include +#include + +void __am_disk_config(AM_DISK_CONFIG_T *cfg) { + cfg->present = false; +} + +void __am_disk_status(AM_DISK_STATUS_T *stat) { +} + +void __am_disk_blkio(AM_DISK_BLKIO_T *io) { +} diff --git a/abstract-machine/am/src/platform/nemu/ioe/gpu.c b/abstract-machine/am/src/platform/nemu/ioe/gpu.c new file mode 100644 index 0000000..fb33096 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/gpu.c @@ -0,0 +1,25 @@ +#include +#include + +#define SYNC_ADDR (VGACTL_ADDR + 4) + +void __am_gpu_init() { +} + +void __am_gpu_config(AM_GPU_CONFIG_T *cfg) { + *cfg = (AM_GPU_CONFIG_T) { + .present = true, .has_accel = false, + .width = 0, .height = 0, + .vmemsz = 0 + }; +} + +void __am_gpu_fbdraw(AM_GPU_FBDRAW_T *ctl) { + if (ctl->sync) { + outl(SYNC_ADDR, 1); + } +} + +void __am_gpu_status(AM_GPU_STATUS_T *status) { + status->ready = true; +} diff --git a/abstract-machine/am/src/platform/nemu/ioe/input.c b/abstract-machine/am/src/platform/nemu/ioe/input.c new file mode 100644 index 0000000..9cecca2 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/input.c @@ -0,0 +1,9 @@ +#include +#include + +#define KEYDOWN_MASK 0x8000 + +void __am_input_keybrd(AM_INPUT_KEYBRD_T *kbd) { + kbd->keydown = 0; + kbd->keycode = AM_KEY_NONE; +} diff --git a/abstract-machine/am/src/platform/nemu/ioe/ioe.c b/abstract-machine/am/src/platform/nemu/ioe/ioe.c new file mode 100644 index 0000000..cdf35d1 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/ioe.c @@ -0,0 +1,59 @@ +#include +#include + +void __am_timer_init(); +void __am_gpu_init(); +void __am_audio_init(); +void __am_input_keybrd(AM_INPUT_KEYBRD_T *); +void __am_timer_rtc(AM_TIMER_RTC_T *); +void __am_timer_uptime(AM_TIMER_UPTIME_T *); +void __am_gpu_config(AM_GPU_CONFIG_T *); +void __am_gpu_status(AM_GPU_STATUS_T *); +void __am_gpu_fbdraw(AM_GPU_FBDRAW_T *); +void __am_audio_config(AM_AUDIO_CONFIG_T *); +void __am_audio_ctrl(AM_AUDIO_CTRL_T *); +void __am_audio_status(AM_AUDIO_STATUS_T *); +void __am_audio_play(AM_AUDIO_PLAY_T *); +void __am_disk_config(AM_DISK_CONFIG_T *cfg); +void __am_disk_status(AM_DISK_STATUS_T *stat); +void __am_disk_blkio(AM_DISK_BLKIO_T *io); + +static void __am_timer_config(AM_TIMER_CONFIG_T *cfg) { cfg->present = true; cfg->has_rtc = true; } +static void __am_input_config(AM_INPUT_CONFIG_T *cfg) { cfg->present = true; } +static void __am_uart_config(AM_UART_CONFIG_T *cfg) { cfg->present = false; } +static void __am_net_config (AM_NET_CONFIG_T *cfg) { cfg->present = false; } + +typedef void (*handler_t)(void *buf); +static void *lut[128] = { + [AM_TIMER_CONFIG] = __am_timer_config, + [AM_TIMER_RTC ] = __am_timer_rtc, + [AM_TIMER_UPTIME] = __am_timer_uptime, + [AM_INPUT_CONFIG] = __am_input_config, + [AM_INPUT_KEYBRD] = __am_input_keybrd, + [AM_GPU_CONFIG ] = __am_gpu_config, + [AM_GPU_FBDRAW ] = __am_gpu_fbdraw, + [AM_GPU_STATUS ] = __am_gpu_status, + [AM_UART_CONFIG ] = __am_uart_config, + [AM_AUDIO_CONFIG] = __am_audio_config, + [AM_AUDIO_CTRL ] = __am_audio_ctrl, + [AM_AUDIO_STATUS] = __am_audio_status, + [AM_AUDIO_PLAY ] = __am_audio_play, + [AM_DISK_CONFIG ] = __am_disk_config, + [AM_DISK_STATUS ] = __am_disk_status, + [AM_DISK_BLKIO ] = __am_disk_blkio, + [AM_NET_CONFIG ] = __am_net_config, +}; + +static void fail(void *buf) { panic("access nonexist register"); } + +bool ioe_init() { + for (int i = 0; i < LENGTH(lut); i++) + if (!lut[i]) lut[i] = fail; + __am_gpu_init(); + __am_timer_init(); + __am_audio_init(); + return true; +} + +void ioe_read (int reg, void *buf) { ((handler_t)lut[reg])(buf); } +void ioe_write(int reg, void *buf) { ((handler_t)lut[reg])(buf); } diff --git a/abstract-machine/am/src/platform/nemu/ioe/timer.c b/abstract-machine/am/src/platform/nemu/ioe/timer.c new file mode 100644 index 0000000..f173ed4 --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/ioe/timer.c @@ -0,0 +1,18 @@ +#include +#include + +void __am_timer_init() { +} + +void __am_timer_uptime(AM_TIMER_UPTIME_T *uptime) { + uptime->us = 0; +} + +void __am_timer_rtc(AM_TIMER_RTC_T *rtc) { + rtc->second = 0; + rtc->minute = 0; + rtc->hour = 0; + rtc->day = 0; + rtc->month = 0; + rtc->year = 1900; +} diff --git a/abstract-machine/am/src/platform/nemu/mpe.c b/abstract-machine/am/src/platform/nemu/mpe.c new file mode 100644 index 0000000..3ab5e4e --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/mpe.c @@ -0,0 +1,20 @@ +#include +#include +#include + +bool mpe_init(void (*entry)()) { + entry(); + panic("MPE entry returns"); +} + +int cpu_count() { + return 1; +} + +int cpu_current() { + return 0; +} + +int atomic_xchg(int *addr, int newval) { + return atomic_exchange(addr, newval); +} diff --git a/abstract-machine/am/src/platform/nemu/trm.c b/abstract-machine/am/src/platform/nemu/trm.c new file mode 100644 index 0000000..f1802aa --- /dev/null +++ b/abstract-machine/am/src/platform/nemu/trm.c @@ -0,0 +1,27 @@ +#include +#include + +extern char _heap_start; +int main(const char *args); + +Area heap = RANGE(&_heap_start, PMEM_END); +#ifndef MAINARGS +#define MAINARGS "" +#endif +static const char mainargs[] = MAINARGS; + +void putch(char ch) { + outb(SERIAL_PORT, ch); +} + +void halt(int code) { + nemu_trap(code); + + // should not reach here + while (1); +} + +void _trm_init() { + int ret = main(mainargs); + halt(ret); +} diff --git a/abstract-machine/am/src/riscv/nemu/cte.c b/abstract-machine/am/src/riscv/nemu/cte.c new file mode 100644 index 0000000..77a357c --- /dev/null +++ b/abstract-machine/am/src/riscv/nemu/cte.c @@ -0,0 +1,50 @@ +#include +#include +#include + +static Context* (*user_handler)(Event, Context*) = NULL; + +Context* __am_irq_handle(Context *c) { + if (user_handler) { + Event ev = {0}; + switch (c->mcause) { + default: ev.event = EVENT_ERROR; break; + } + + c = user_handler(ev, c); + assert(c != NULL); + } + + return c; +} + +extern void __am_asm_trap(void); + +bool cte_init(Context*(*handler)(Event, Context*)) { + // initialize exception entry + asm volatile("csrw mtvec, %0" : : "r"(__am_asm_trap)); + + // register event handler + user_handler = handler; + + return true; +} + +Context *kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { +#ifdef __riscv_e + asm volatile("li a5, -1; ecall"); +#else + asm volatile("li a7, -1; ecall"); +#endif +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/riscv/nemu/start.S b/abstract-machine/am/src/riscv/nemu/start.S new file mode 100644 index 0000000..3e56e5c --- /dev/null +++ b/abstract-machine/am/src/riscv/nemu/start.S @@ -0,0 +1,8 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + mv s0, zero + la sp, _stack_pointer + jal _trm_init diff --git a/abstract-machine/am/src/riscv/nemu/trap.S b/abstract-machine/am/src/riscv/nemu/trap.S new file mode 100644 index 0000000..5ec275a --- /dev/null +++ b/abstract-machine/am/src/riscv/nemu/trap.S @@ -0,0 +1,71 @@ +#define concat_temp(x, y) x ## y +#define concat(x, y) concat_temp(x, y) +#define MAP(c, f) c(f) + +#if __riscv_xlen == 32 +#define LOAD lw +#define STORE sw +#define XLEN 4 +#else +#define LOAD ld +#define STORE sd +#define XLEN 8 +#endif + +#define REGS_LO16(f) \ + f( 1) f( 3) f( 4) f( 5) f( 6) f( 7) f( 8) f( 9) \ +f(10) f(11) f(12) f(13) f(14) f(15) +#ifndef __riscv_e +#define REGS_HI16(f) \ + f(16) f(17) f(18) f(19) \ +f(20) f(21) f(22) f(23) f(24) f(25) f(26) f(27) f(28) f(29) \ +f(30) f(31) +#define NR_REGS 32 +#else +#define REGS_HI16(f) +#define NR_REGS 16 +#endif + +#define REGS(f) REGS_LO16(f) REGS_HI16(f) + +#define PUSH(n) STORE concat(x, n), (n * XLEN)(sp); +#define POP(n) LOAD concat(x, n), (n * XLEN)(sp); + +#define CONTEXT_SIZE ((NR_REGS + 3 + 1) * XLEN) +#define OFFSET_SP ( 2 * XLEN) +#define OFFSET_CAUSE ((NR_REGS + 0) * XLEN) +#define OFFSET_STATUS ((NR_REGS + 1) * XLEN) +#define OFFSET_EPC ((NR_REGS + 2) * XLEN) + +.align 3 +.globl __am_asm_trap +__am_asm_trap: + addi sp, sp, -CONTEXT_SIZE + + MAP(REGS, PUSH) + + csrr t0, mcause + csrr t1, mstatus + csrr t2, mepc + + STORE t0, OFFSET_CAUSE(sp) + STORE t1, OFFSET_STATUS(sp) + STORE t2, OFFSET_EPC(sp) + + # set mstatus.MPRV to pass difftest + li a0, (1 << 17) + or t1, t1, a0 + csrw mstatus, t1 + + mv a0, sp + jal __am_irq_handle + + LOAD t1, OFFSET_STATUS(sp) + LOAD t2, OFFSET_EPC(sp) + csrw mstatus, t1 + csrw mepc, t2 + + MAP(REGS, POP) + + addi sp, sp, CONTEXT_SIZE + mret diff --git a/abstract-machine/am/src/riscv/nemu/vme.c b/abstract-machine/am/src/riscv/nemu/vme.c new file mode 100644 index 0000000..0de0143 --- /dev/null +++ b/abstract-machine/am/src/riscv/nemu/vme.c @@ -0,0 +1,74 @@ +#include +#include +#include + +static AddrSpace kas = {}; +static void* (*pgalloc_usr)(int) = NULL; +static void (*pgfree_usr)(void*) = NULL; +static int vme_enable = 0; + +static Area segments[] = { // Kernel memory mappings + NEMU_PADDR_SPACE +}; + +#define USER_SPACE RANGE(0x40000000, 0x80000000) + +static inline void set_satp(void *pdir) { + uintptr_t mode = 1ul << (__riscv_xlen - 1); + asm volatile("csrw satp, %0" : : "r"(mode | ((uintptr_t)pdir >> 12))); +} + +static inline uintptr_t get_satp() { + uintptr_t satp; + asm volatile("csrr %0, satp" : "=r"(satp)); + return satp << 12; +} + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + pgalloc_usr = pgalloc_f; + pgfree_usr = pgfree_f; + + kas.ptr = pgalloc_f(PGSIZE); + + int i; + for (i = 0; i < LENGTH(segments); i ++) { + void *va = segments[i].start; + for (; va < segments[i].end; va += PGSIZE) { + map(&kas, va, va, 0); + } + } + + set_satp(kas.ptr); + vme_enable = 1; + + return true; +} + +void protect(AddrSpace *as) { + PTE *updir = (PTE*)(pgalloc_usr(PGSIZE)); + as->ptr = updir; + as->area = USER_SPACE; + as->pgsize = PGSIZE; + // map kernel space + memcpy(updir, kas.ptr, PGSIZE); +} + +void unprotect(AddrSpace *as) { +} + +void __am_get_cur_as(Context *c) { + c->pdir = (vme_enable ? (void *)get_satp() : NULL); +} + +void __am_switch(Context *c) { + if (vme_enable && c->pdir != NULL) { + set_satp(c->pdir); + } +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/riscv/npc/cte.c b/abstract-machine/am/src/riscv/npc/cte.c new file mode 100644 index 0000000..77a357c --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/cte.c @@ -0,0 +1,50 @@ +#include +#include +#include + +static Context* (*user_handler)(Event, Context*) = NULL; + +Context* __am_irq_handle(Context *c) { + if (user_handler) { + Event ev = {0}; + switch (c->mcause) { + default: ev.event = EVENT_ERROR; break; + } + + c = user_handler(ev, c); + assert(c != NULL); + } + + return c; +} + +extern void __am_asm_trap(void); + +bool cte_init(Context*(*handler)(Event, Context*)) { + // initialize exception entry + asm volatile("csrw mtvec, %0" : : "r"(__am_asm_trap)); + + // register event handler + user_handler = handler; + + return true; +} + +Context *kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { +#ifdef __riscv_e + asm volatile("li a5, -1; ecall"); +#else + asm volatile("li a7, -1; ecall"); +#endif +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/riscv/npc/input.c b/abstract-machine/am/src/riscv/npc/input.c new file mode 100644 index 0000000..0460fa4 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/input.c @@ -0,0 +1,6 @@ +#include + +void __am_input_keybrd(AM_INPUT_KEYBRD_T *kbd) { + kbd->keydown = 0; + kbd->keycode = AM_KEY_NONE; +} diff --git a/abstract-machine/am/src/riscv/npc/ioe.c b/abstract-machine/am/src/riscv/npc/ioe.c new file mode 100644 index 0000000..26bad0a --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/ioe.c @@ -0,0 +1,32 @@ +#include +#include + +void __am_timer_init(); + +void __am_timer_rtc(AM_TIMER_RTC_T *); +void __am_timer_uptime(AM_TIMER_UPTIME_T *); +void __am_input_keybrd(AM_INPUT_KEYBRD_T *); + +static void __am_timer_config(AM_TIMER_CONFIG_T *cfg) { cfg->present = true; cfg->has_rtc = true; } +static void __am_input_config(AM_INPUT_CONFIG_T *cfg) { cfg->present = true; } + +typedef void (*handler_t)(void *buf); +static void *lut[128] = { + [AM_TIMER_CONFIG] = __am_timer_config, + [AM_TIMER_RTC ] = __am_timer_rtc, + [AM_TIMER_UPTIME] = __am_timer_uptime, + [AM_INPUT_CONFIG] = __am_input_config, + [AM_INPUT_KEYBRD] = __am_input_keybrd, +}; + +static void fail(void *buf) { panic("access nonexist register"); } + +bool ioe_init() { + for (int i = 0; i < LENGTH(lut); i++) + if (!lut[i]) lut[i] = fail; + __am_timer_init(); + return true; +} + +void ioe_read (int reg, void *buf) { ((handler_t)lut[reg])(buf); } +void ioe_write(int reg, void *buf) { ((handler_t)lut[reg])(buf); } diff --git a/abstract-machine/am/src/riscv/npc/libgcc/ashldi3.c b/abstract-machine/am/src/riscv/npc/libgcc/ashldi3.c new file mode 100644 index 0000000..63bb3b6 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/ashldi3.c @@ -0,0 +1,27 @@ +#define LIBGCC2_UNITS_PER_WORD (__riscv_xlen / 8) +#include "libgcc2.h" + +DWtype __ashldi3 (DWtype u, shift_count_type b) +{ + if (b == 0) + return u; + + const DWunion uu = {.ll = u}; + const shift_count_type bm = W_TYPE_SIZE - b; + DWunion w; + + if (bm <= 0) + { + w.s.low = 0; + w.s.high = (UWtype) uu.s.low << -bm; + } + else + { + const UWtype carries = (UWtype) uu.s.low >> bm; + + w.s.low = (UWtype) uu.s.low << b; + w.s.high = ((UWtype) uu.s.high << b) | carries; + } + + return w.ll; +} diff --git a/abstract-machine/am/src/riscv/npc/libgcc/div.S b/abstract-machine/am/src/riscv/npc/libgcc/div.S new file mode 100644 index 0000000..d941a90 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/div.S @@ -0,0 +1,150 @@ +/* Integer division routines for RISC-V. + + Copyright (C) 2016-2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "riscv-asm.h" + + .text + .align 2 + +#if __riscv_xlen == 32 +/* Our RV64 64-bit routines are equivalent to our RV32 32-bit routines. */ +# define __udivdi3 __udivsi3 +# define __umoddi3 __umodsi3 +# define __divdi3 __divsi3 +# define __moddi3 __modsi3 +#else +FUNC_BEGIN (__udivsi3) + /* Compute __udivdi3(a0 << 32, a1 << 32); cast result to uint32_t. */ + sll a0, a0, 32 + sll a1, a1, 32 + move t0, ra + jal HIDDEN_JUMPTARGET(__udivdi3) + sext.w a0, a0 + jr t0 +FUNC_END (__udivsi3) + +FUNC_BEGIN (__umodsi3) + /* Compute __udivdi3((uint32_t)a0, (uint32_t)a1); cast a1 to uint32_t. */ + sll a0, a0, 32 + sll a1, a1, 32 + srl a0, a0, 32 + srl a1, a1, 32 + move t0, ra + jal HIDDEN_JUMPTARGET(__udivdi3) + sext.w a0, a1 + jr t0 +FUNC_END (__umodsi3) + +FUNC_ALIAS (__modsi3, __moddi3) + +FUNC_BEGIN( __divsi3) + /* Check for special case of INT_MIN/-1. Otherwise, fall into __divdi3. */ + li t0, -1 + beq a1, t0, .L20 +#endif + +FUNC_BEGIN (__divdi3) + bltz a0, .L10 + bltz a1, .L11 + /* Since the quotient is positive, fall into __udivdi3. */ + +FUNC_BEGIN (__udivdi3) + mv a2, a1 + mv a1, a0 + li a0, -1 + beqz a2, .L5 + li a3, 1 + bgeu a2, a1, .L2 +.L1: + blez a2, .L2 + slli a2, a2, 1 + slli a3, a3, 1 + bgtu a1, a2, .L1 +.L2: + li a0, 0 +.L3: + bltu a1, a2, .L4 + sub a1, a1, a2 + or a0, a0, a3 +.L4: + srli a3, a3, 1 + srli a2, a2, 1 + bnez a3, .L3 +.L5: + ret +FUNC_END (__udivdi3) +HIDDEN_DEF (__udivdi3) + +FUNC_BEGIN (__umoddi3) + /* Call __udivdi3(a0, a1), then return the remainder, which is in a1. */ + move t0, ra + jal HIDDEN_JUMPTARGET(__udivdi3) + move a0, a1 + jr t0 +FUNC_END (__umoddi3) + + /* Handle negative arguments to __divdi3. */ +.L10: + neg a0, a0 + /* Zero is handled as a negative so that the result will not be inverted. */ + bgtz a1, .L12 /* Compute __udivdi3(-a0, a1), then negate the result. */ + + neg a1, a1 + j HIDDEN_JUMPTARGET(__udivdi3) /* Compute __udivdi3(-a0, -a1). */ +.L11: /* Compute __udivdi3(a0, -a1), then negate the result. */ + neg a1, a1 +.L12: + move t0, ra + jal HIDDEN_JUMPTARGET(__udivdi3) + neg a0, a0 + jr t0 +FUNC_END (__divdi3) + +FUNC_BEGIN (__moddi3) + move t0, ra + bltz a1, .L31 + bltz a0, .L32 +.L30: + jal HIDDEN_JUMPTARGET(__udivdi3) /* The dividend is not negative. */ + move a0, a1 + jr t0 +.L31: + neg a1, a1 + bgez a0, .L30 +.L32: + neg a0, a0 + jal HIDDEN_JUMPTARGET(__udivdi3) /* The dividend is hella negative. */ + neg a0, a1 + jr t0 +FUNC_END (__moddi3) + +#if __riscv_xlen == 64 + /* continuation of __divsi3 */ +.L20: + sll t0, t0, 31 + bne a0, t0, __divdi3 + ret +FUNC_END (__divsi3) +#endif diff --git a/abstract-machine/am/src/riscv/npc/libgcc/libgcc2.h b/abstract-machine/am/src/riscv/npc/libgcc/libgcc2.h new file mode 100644 index 0000000..f0724ee --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/libgcc2.h @@ -0,0 +1,543 @@ +/* Header file for libgcc2.c. */ +/* Copyright (C) 2000-2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef GCC_LIBGCC2_H +#define GCC_LIBGCC2_H + +#include + +#define MIN_UNITS_PER_WORD (__riscv_xlen / 8) + +#ifndef HIDE_EXPORTS +#pragma GCC visibility push(default) +#endif + +extern int __gcc_bcmp (const unsigned char *, const unsigned char *, size_t); +extern void __clear_cache (void *, void *); +extern void __eprintf (const char *, const char *, unsigned int, const char *) + __attribute__ ((__noreturn__)); + +#ifdef __LIBGCC_HAS_HF_MODE__ +#define LIBGCC2_HAS_HF_MODE 1 +#else +#define LIBGCC2_HAS_HF_MODE 0 +#endif + +#ifdef __LIBGCC_HAS_SF_MODE__ +#define LIBGCC2_HAS_SF_MODE 1 +#else +#define LIBGCC2_HAS_SF_MODE 0 +#endif + +#ifdef __LIBGCC_HAS_DF_MODE__ +#define LIBGCC2_HAS_DF_MODE 1 +#else +#define LIBGCC2_HAS_DF_MODE 0 +#endif + +#ifdef __LIBGCC_HAS_XF_MODE__ +#define LIBGCC2_HAS_XF_MODE 1 +#else +#define LIBGCC2_HAS_XF_MODE 0 +#endif + +#ifdef __LIBGCC_HAS_TF_MODE__ +#define LIBGCC2_HAS_TF_MODE 1 +#else +#define LIBGCC2_HAS_TF_MODE 0 +#endif + +#ifndef __LIBGCC_SF_MANT_DIG__ +#if LIBGCC2_HAS_SF_MODE +#error __LIBGCC_SF_MANT_DIG__ not defined +#else +#define __LIBGCC_SF_MANT_DIG__ 0 +#endif +#endif + +#ifndef __LIBGCC_DF_MANT_DIG__ +#if LIBGCC2_HAS_DF_MODE +#error __LIBGCC_DF_MANT_DIG__ not defined +#else +#define __LIBGCC_DF_MANT_DIG__ 0 +#endif +#endif + +#ifndef __LIBGCC_XF_MANT_DIG__ +#if LIBGCC2_HAS_XF_MODE +#error __LIBGCC_XF_MANT_DIG__ not defined +#else +#define __LIBGCC_XF_MANT_DIG__ 0 +#endif +#endif + +#ifndef __LIBGCC_TF_MANT_DIG__ +#if LIBGCC2_HAS_TF_MODE +#error __LIBGCC_TF_MANT_DIG__ not defined +#else +#define __LIBGCC_TF_MANT_DIG__ 0 +#endif +#endif + +/* FIXME: This #ifdef probably should be removed, ie. enable the test + for mips too. */ +/* Don't use IBM Extended Double TFmode for TI->SF calculations. + The conversion from long double to float suffers from double + rounding, because we convert via double. In other cases, going + through the software fp routines is much slower than the fallback. */ +#ifdef __powerpc__ +#define AVOID_FP_TYPE_CONVERSION(SIZE) (SIZE == 106) +#elif defined(WIDEST_HARDWARE_FP_SIZE) +#define AVOID_FP_TYPE_CONVERSION(SIZE) (SIZE > WIDEST_HARDWARE_FP_SIZE) +#else +#define AVOID_FP_TYPE_CONVERSION(SIZE) 0 +#endif + +/* In the first part of this file, we are interfacing to calls generated + by the compiler itself. These calls pass values into these routines + which have very specific modes (rather than very specific types), and + these compiler-generated calls also expect any return values to have + very specific modes (rather than very specific types). Thus, we need + to avoid using regular C language type names in this part of the file + because the sizes for those types can be configured to be anything. + Instead we use the following special type names. */ + +typedef int QItype __attribute__ ((mode (QI))); +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int HItype __attribute__ ((mode (HI))); +typedef unsigned int UHItype __attribute__ ((mode (HI))); +#if MIN_UNITS_PER_WORD > 1 +/* These typedefs are usually forbidden on dsp's with UNITS_PER_WORD 1. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +#if __SIZEOF_LONG_LONG__ > 4 +/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 2. */ +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +#if MIN_UNITS_PER_WORD > 4 +/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 4. */ +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); +#endif +#endif +#endif + +#if LIBGCC2_HAS_HF_MODE +typedef float HFtype __attribute__ ((mode (HF))); +typedef _Complex float HCtype __attribute__ ((mode (HC))); +#endif +#if LIBGCC2_HAS_SF_MODE +typedef float SFtype __attribute__ ((mode (SF))); +typedef _Complex float SCtype __attribute__ ((mode (SC))); +#endif +#if LIBGCC2_HAS_DF_MODE +typedef float DFtype __attribute__ ((mode (DF))); +typedef _Complex float DCtype __attribute__ ((mode (DC))); +#endif +#if LIBGCC2_HAS_XF_MODE +typedef float XFtype __attribute__ ((mode (XF))); +typedef _Complex float XCtype __attribute__ ((mode (XC))); +#endif +#if LIBGCC2_HAS_TF_MODE +typedef float TFtype __attribute__ ((mode (TF))); +typedef _Complex float TCtype __attribute__ ((mode (TC))); +#endif + +typedef int cmp_return_type __attribute__((mode (__libgcc_cmp_return__))); +typedef int shift_count_type __attribute__((mode (__libgcc_shift_count__))); + +/* Make sure that we don't accidentally use any normal C language built-in + type names in the first part of this file. Instead we want to use *only* + the type names defined above. The following macro definitions insure + that if we *do* accidentally use some normal C language built-in type name, + we will get a syntax error. */ + +#define char bogus_type +#define short bogus_type +#define int bogus_type +#define long bogus_type +#define unsigned bogus_type +#define float bogus_type +#define double bogus_type + +/* Versions prior to 3.4.4 were not taking into account the word size for + the 5 trapping arithmetic functions absv, addv, subv, mulv and negv. As + a consequence, the si and di variants were always and the only ones emitted. + To maintain backward compatibility, COMPAT_SIMODE_TRAPPING_ARITHMETIC is + defined on platforms where it makes sense to still have the si variants + emitted. As a bonus, their implementation is now correct. Note that the + same mechanism should have been implemented for the di variants, but it + turns out that no platform would define COMPAT_DIMODE_TRAPPING_ARITHMETIC + if it existed. */ + +#if LIBGCC2_UNITS_PER_WORD == 8 +#define W_TYPE_SIZE (8 * __CHAR_BIT__) +#define Wtype DItype +#define UWtype UDItype +#define HWtype DItype +#define UHWtype UDItype +#define DWtype TItype +#define UDWtype UTItype +#ifdef LIBGCC2_GNU_PREFIX +#define __NW(a,b) __gnu_ ## a ## di ## b +#define __NDW(a,b) __gnu_ ## a ## ti ## b +#else +#define __NW(a,b) __ ## a ## di ## b +#define __NDW(a,b) __ ## a ## ti ## b +#endif +#define COMPAT_SIMODE_TRAPPING_ARITHMETIC +#elif LIBGCC2_UNITS_PER_WORD == 4 +#define W_TYPE_SIZE (4 * __CHAR_BIT__) +#define Wtype SItype +#define UWtype USItype +#define HWtype SItype +#define UHWtype USItype +#define DWtype DItype +#define UDWtype UDItype +#ifdef LIBGCC2_GNU_PREFIX +#define __NW(a,b) __gnu_ ## a ## si ## b +#define __NDW(a,b) __gnu_ ## a ## di ## b +#else +#define __NW(a,b) __ ## a ## si ## b +#define __NDW(a,b) __ ## a ## di ## b +#endif +#elif LIBGCC2_UNITS_PER_WORD == 2 +#define W_TYPE_SIZE (2 * __CHAR_BIT__) +#define Wtype HItype +#define UWtype UHItype +#define HWtype HItype +#define UHWtype UHItype +#define DWtype SItype +#define UDWtype USItype +#ifdef LIBGCC2_GNU_PREFIX +#define __NW(a,b) __gnu_ ## a ## hi ## b +#define __NDW(a,b) __gnu_ ## a ## si ## b +#else +#define __NW(a,b) __ ## a ## hi ## b +#define __NDW(a,b) __ ## a ## si ## b +#endif +#else +#define W_TYPE_SIZE __CHAR_BIT__ +#define Wtype QItype +#define UWtype UQItype +#define HWtype QItype +#define UHWtype UQItype +#define DWtype HItype +#define UDWtype UHItype +#ifdef LIBGCC2_GNU_PREFIX +#define __NW(a,b) __gnu_ ## a ## qi ## b +#define __NDW(a,b) __gnu_ ## a ## hi ## b +#else +#define __NW(a,b) __ ## a ## qi ## b +#define __NDW(a,b) __ ## a ## hi ## b +#endif +#endif + +#ifdef LIBGCC2_GNU_PREFIX +#define __N(a) __gnu_ ## a +#else +#define __N(a) __ ## a +#endif +#define Wtype_MAX ((Wtype)(((UWtype)1 << (W_TYPE_SIZE - 1)) - 1)) +#define Wtype_MIN (- Wtype_MAX - 1) + +#if W_TYPE_SIZE == 8 +# define Wtype_MAXp1_F 0x1p8f +#elif W_TYPE_SIZE == 16 +# define Wtype_MAXp1_F 0x1p16f +#elif W_TYPE_SIZE == 32 +# define Wtype_MAXp1_F 0x1p32f +#elif W_TYPE_SIZE == 64 +# define Wtype_MAXp1_F 0x1p64f +#else +# error "expand the table" +#endif + +#define __muldi3 __NDW(mul,3) +#define __divdi3 __NDW(div,3) +#define __udivdi3 __NDW(udiv,3) +#define __moddi3 __NDW(mod,3) +#define __umoddi3 __NDW(umod,3) +#define __negdi2 __NDW(neg,2) +#define __lshrdi3 __NDW(lshr,3) +#define __ashldi3 __NDW(ashl,3) +#define __ashrdi3 __NDW(ashr,3) +#define __cmpdi2 __NDW(cmp,2) +#define __ucmpdi2 __NDW(ucmp,2) +#define __divmoddi4 __NDW(divmod,4) +#define __udivmoddi4 __NDW(udivmod,4) +#define __fixunstfDI __NDW(fixunstf,) +#define __fixtfdi __NDW(fixtf,) +#define __fixunsxfDI __NDW(fixunsxf,) +#define __fixxfdi __NDW(fixxf,) +#define __fixunsdfDI __NDW(fixunsdf,) +#define __fixdfdi __NDW(fixdf,) +#define __fixunssfDI __NDW(fixunssf,) +#define __fixsfdi __NDW(fixsf,) +#define __floatdixf __NDW(float,xf) +#define __floatditf __NDW(float,tf) +#define __floatdidf __NDW(float,df) +#define __floatdisf __NDW(float,sf) +#define __floatundixf __NDW(floatun,xf) +#define __floatunditf __NDW(floatun,tf) +#define __floatundidf __NDW(floatun,df) +#define __floatundisf __NDW(floatun,sf) +#define __fixunsxfSI __NW(fixunsxf,) +#define __fixunstfSI __NW(fixunstf,) +#define __fixunsdfSI __NW(fixunsdf,) +#define __fixunssfSI __NW(fixunssf,) + +#define __absvSI2 __NW(absv,2) +#define __addvSI3 __NW(addv,3) +#define __subvSI3 __NW(subv,3) +#define __mulvSI3 __NW(mulv,3) +#define __negvSI2 __NW(negv,2) +#define __absvDI2 __NDW(absv,2) +#define __addvDI3 __NDW(addv,3) +#define __subvDI3 __NDW(subv,3) +#define __mulvDI3 __NDW(mulv,3) +#define __negvDI2 __NDW(negv,2) + +#define __ffsSI2 __NW(ffs,2) +#define __clzSI2 __NW(clz,2) +#define __ctzSI2 __NW(ctz,2) +#define __clrsbSI2 __NW(clrsb,2) +#define __popcountSI2 __NW(popcount,2) +#define __paritySI2 __NW(parity,2) +#define __ffsDI2 __NDW(ffs,2) +#define __clzDI2 __NDW(clz,2) +#define __ctzDI2 __NDW(ctz,2) +#define __clrsbDI2 __NDW(clrsb,2) +#define __popcountDI2 __NDW(popcount,2) +#define __parityDI2 __NDW(parity,2) + +#define __clz_tab __N(clz_tab) +#define __bswapsi2 __N(bswapsi2) +#define __bswapdi2 __N(bswapdi2) +#define __udiv_w_sdiv __N(udiv_w_sdiv) +#define __clear_cache __N(clear_cache) +#define __enable_execute_stack __N(enable_execute_stack) + +#ifndef __powisf2 +#define __powisf2 __N(powisf2) +#endif +#ifndef __powidf2 +#define __powidf2 __N(powidf2) +#endif +#ifndef __powitf2 +#define __powitf2 __N(powitf2) +#endif +#ifndef __powixf2 +#define __powixf2 __N(powixf2) +#endif +#ifndef __mulsc3 +#define __mulsc3 __N(mulsc3) +#endif +#ifndef __muldc3 +#define __muldc3 __N(muldc3) +#endif +#ifndef __mulxc3 +#define __mulxc3 __N(mulxc3) +#endif +#ifndef __multc3 +#define __multc3 __N(multc3) +#endif +#ifndef __divsc3 +#define __divsc3 __N(divsc3) +#endif +#ifndef __divdc3 +#define __divdc3 __N(divdc3) +#endif +#ifndef __divxc3 +#define __divxc3 __N(divxc3) +#endif +#ifndef __divtc3 +#define __divtc3 __N(divtc3) +#endif + +extern DWtype __muldi3 (DWtype, DWtype); +extern DWtype __divdi3 (DWtype, DWtype); +extern UDWtype __udivdi3 (UDWtype, UDWtype); +extern UDWtype __umoddi3 (UDWtype, UDWtype); +extern DWtype __moddi3 (DWtype, DWtype); +extern DWtype __divmoddi4 (DWtype, DWtype, DWtype *); + +/* __udivmoddi4 is static inline when building other libgcc2 portions. */ +#if (!defined (L_udivdi3) && !defined (L_divdi3) && \ + !defined (L_umoddi3) && !defined (L_moddi3) && \ + !defined (L_divmoddi4)) +extern UDWtype __udivmoddi4 (UDWtype, UDWtype, UDWtype *); +#endif + +/* __negdi2 is static inline when building other libgcc2 portions. */ +#if !defined(L_divdi3) && !defined(L_moddi3) +extern DWtype __negdi2 (DWtype); +#endif + +extern DWtype __lshrdi3 (DWtype, shift_count_type); +extern DWtype __ashldi3 (DWtype, shift_count_type); +extern DWtype __ashrdi3 (DWtype, shift_count_type); + +/* __udiv_w_sdiv is static inline when building other libgcc2 portions. */ +#if (!defined(L_udivdi3) && !defined(L_divdi3) && \ + !defined(L_umoddi3) && !defined(L_moddi3)) +extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); +#endif + +extern cmp_return_type __cmpdi2 (DWtype, DWtype); +extern cmp_return_type __ucmpdi2 (UDWtype, UDWtype); + +#if MIN_UNITS_PER_WORD > 1 +extern SItype __bswapsi2 (SItype); +#endif +#if __SIZEOF_LONG_LONG__ > 4 +extern DItype __bswapdi2 (DItype); +#endif + +extern Wtype __absvSI2 (Wtype); +extern Wtype __addvSI3 (Wtype, Wtype); +extern Wtype __subvSI3 (Wtype, Wtype); +extern Wtype __mulvSI3 (Wtype, Wtype); +extern Wtype __negvSI2 (Wtype); +extern DWtype __absvDI2 (DWtype); +extern DWtype __addvDI3 (DWtype, DWtype); +extern DWtype __subvDI3 (DWtype, DWtype); +extern DWtype __mulvDI3 (DWtype, DWtype); +extern DWtype __negvDI2 (DWtype); + +#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC +#define __absvsi2 __N(absvsi2) +#define __negvsi2 __N(negvsi2) +#define __addvsi3 __N(addvsi3) +#define __subvsi3 __N(subvsi3) +#define __mulvsi3 __N(mulvsi3) + +extern SItype __absvsi2 (SItype); +extern SItype __addvsi3 (SItype, SItype); +extern SItype __subvsi3 (SItype, SItype); +extern SItype __mulvsi3 (SItype, SItype); +extern SItype __negvsi2 (SItype); +#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */ + +#undef int +#if LIBGCC2_HAS_HF_MODE +extern HCtype __divhc3 (HFtype, HFtype, HFtype, HFtype); +extern HCtype __mulhc3 (HFtype, HFtype, HFtype, HFtype); +#endif +#if LIBGCC2_HAS_SF_MODE +extern DWtype __fixsfdi (SFtype); +extern SFtype __floatdisf (DWtype); +extern SFtype __floatundisf (UDWtype); +extern UWtype __fixunssfSI (SFtype); +extern UDWtype __fixunssfDI (SFtype); +extern SFtype __powisf2 (SFtype, int); +extern SCtype __divsc3 (SFtype, SFtype, SFtype, SFtype); +extern SCtype __mulsc3 (SFtype, SFtype, SFtype, SFtype); +#endif +#if LIBGCC2_HAS_DF_MODE +extern DWtype __fixdfdi (DFtype); +extern DFtype __floatdidf (DWtype); +extern DFtype __floatundidf (UDWtype); +extern UWtype __fixunsdfSI (DFtype); +extern UDWtype __fixunsdfDI (DFtype); +extern DFtype __powidf2 (DFtype, int); +extern DCtype __divdc3 (DFtype, DFtype, DFtype, DFtype); +extern DCtype __muldc3 (DFtype, DFtype, DFtype, DFtype); +#endif + +#if LIBGCC2_HAS_XF_MODE +extern DWtype __fixxfdi (XFtype); +extern UDWtype __fixunsxfDI (XFtype); +extern XFtype __floatdixf (DWtype); +extern XFtype __floatundixf (UDWtype); +extern UWtype __fixunsxfSI (XFtype); +extern XFtype __powixf2 (XFtype, int); +extern XCtype __divxc3 (XFtype, XFtype, XFtype, XFtype); +extern XCtype __mulxc3 (XFtype, XFtype, XFtype, XFtype); +#endif + +#if LIBGCC2_HAS_TF_MODE +extern UDWtype __fixunstfDI (TFtype); +extern DWtype __fixtfdi (TFtype); +extern TFtype __floatditf (DWtype); +extern TFtype __floatunditf (UDWtype); +extern TFtype __powitf2 (TFtype, int); +extern TCtype __divtc3 (TFtype, TFtype, TFtype, TFtype); +extern TCtype __multc3 (TFtype, TFtype, TFtype, TFtype); +#endif +#define int bogus_type + +/* DWstructs are pairs of Wtype values in the order determined by + __BYTE_ORDER__. */ + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + struct DWstruct {Wtype high, low;}; +#else + struct DWstruct {Wtype low, high;}; +#endif + +/* We need this union to unpack/pack DImode values, since we don't have + any arithmetic yet. Incoming DImode parameters are stored into the + `ll' field, and the unpacked result is read from the struct `s'. */ + +typedef union +{ + struct DWstruct s; + DWtype ll; +} DWunion; + +/* Defined for L_popcount_tab. Exported here because some targets may + want to use it for their own versions of the __popcount builtins. */ +extern const UQItype __popcount_tab[256]; + +/* Defined for L_clz. Exported here because some targets may want to use + it for their own versions of the __clz builtins. It contains the bit + position of the first set bit for the numbers 0 - 255. This avoids the + need for a separate table for the __ctz builtins. */ +extern const UQItype __clz_tab[256]; + +#include "longlong.h" + +#undef int +extern int __clzDI2 (UDWtype); +extern int __clzSI2 (UWtype); +extern int __ctzSI2 (UWtype); +extern int __ctzDI2 (UDWtype); +extern int __clrsbSI2 (Wtype); +extern int __clrsbDI2 (DWtype); +extern int __ffsSI2 (UWtype); +extern int __ffsDI2 (DWtype); +extern int __popcountSI2 (UWtype); +extern int __popcountDI2 (UDWtype); +extern int __paritySI2 (UWtype); +extern int __parityDI2 (UDWtype); +#define int bogus_type + +extern void __enable_execute_stack (void *); + +#ifndef HIDE_EXPORTS +#pragma GCC visibility pop +#endif + +#endif /* ! GCC_LIBGCC2_H */ diff --git a/abstract-machine/am/src/riscv/npc/libgcc/longlong.h b/abstract-machine/am/src/riscv/npc/libgcc/longlong.h new file mode 100644 index 0000000..64a7b10 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/longlong.h @@ -0,0 +1,1774 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + Copyright (C) 1991-2022 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file into + combinations with other programs, and to distribute those + combinations without any restriction coming from the use of this + file. (The Lesser General Public License restrictions do apply in + other respects; for example, they cover modification of the file, + and distribution when not linked into a combine executable.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype. + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + UQItype -- Unsigned 8 bit type. + SItype, USItype -- Signed and unsigned 32 bit types. + DItype, UDItype -- Signed and unsigned 64 bit types. + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. */ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +#ifndef W_TYPE_SIZE +#define W_TYPE_SIZE 32 +#define UWtype USItype +#define UHWtype USItype +#define UDWtype UDItype +#endif + +/* Used in glibc only. */ +#ifndef attribute_hidden +#define attribute_hidden +#endif + +extern const UQItype __clz_tab[256] attribute_hidden; + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two + UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + UDWtype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first nonzero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! + (E.g. WE32100, IBM360.) */ + +#if defined (__GNUC__) && !defined (NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +#if defined (__aarch64__) + +#if W_TYPE_SIZE == 32 +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* W_TYPE_SIZE == 32 */ + +#if W_TYPE_SIZE == 64 +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) +#define COUNT_LEADING_ZEROS_0 64 +#endif /* W_TYPE_SIZE == 64 */ + +#endif /* __aarch64__ */ + +#if defined (__alpha) && W_TYPE_SIZE == 64 +/* There is a bug in g++ before version 5 that + errors on __builtin_alpha_umulh. */ +#if !defined(__cplusplus) || __GNUC__ >= 5 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh (__m0, __m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 46 +#endif /* !c++ */ +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#ifdef __alpha_cix__ +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) +#define COUNT_LEADING_ZEROS_0 64 +#else +#define count_leading_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __t = __builtin_alpha_cmpbge (0, __xr); \ + __a = __clz_tab[__t ^ 0xff] - 1; \ + __t = __builtin_alpha_extbl (__xr, __a); \ + (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ + } while (0) +#define count_trailing_zeros(COUNT,X) \ + do { \ + UDItype __xr = (X), __t, __a; \ + __t = __builtin_alpha_cmpbge (0, __xr); \ + __t = ~__t & -~__t; \ + __a = ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + __t = __builtin_alpha_extbl (__xr, __a); \ + __a <<= 3; \ + __t &= -__t; \ + __a += ((__t & 0xCC) != 0) * 2; \ + __a += ((__t & 0xF0) != 0) * 4; \ + __a += ((__t & 0xAA) != 0); \ + (COUNT) = __a; \ + } while (0) +#endif /* __alpha_cix__ */ +#endif /* __alpha */ + +#if defined (__arc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%r" ((USItype) (ah)), \ + "rICal" ((USItype) (bh)), \ + "%r" ((USItype) (al)), \ + "rICal" ((USItype) (bl)) \ + : "cc") +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rICal" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rICal" ((USItype) (bl)) \ + : "cc") + +#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) +#ifdef __ARC_NORM__ +#define count_leading_zeros(count, x) \ + do \ + { \ + SItype c_; \ + \ + __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\ + (count) = c_ + 1; \ + } \ + while (0) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __ARC_NORM__ */ +#endif /* __arc__ */ + +#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ + && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "%r" ((USItype) (al)), \ + "rI" ((USItype) (bl)) __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rI" ((USItype) (bl)) __CLOBBER_CC) +# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ + || defined(__ARM_ARCH_3__) +# define umul_ppmm(xh, xl, a, b) \ + do { \ + register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + " mov %2, %5, lsr #16\n" \ + " mov %0, %6, lsr #16\n" \ + " bic %3, %5, %2, lsl #16\n" \ + " bic %4, %6, %0, lsl #16\n" \ + " mul %1, %3, %4\n" \ + " mul %4, %2, %4\n" \ + " mul %3, %0, %3\n" \ + " mul %0, %2, %0\n" \ + " adds %3, %4, %3\n" \ + " addcs %0, %0, #65536\n" \ + " adds %1, %1, %3, lsl #16\n" \ + " adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)) __CLOBBER_CC ); \ + } while (0) +# define UMUL_TIME 20 +# else +# define umul_ppmm(xh, xl, a, b) \ + do { \ + /* Generate umull, under compiler control. */ \ + register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ + (xl) = (USItype)__t0; \ + (xh) = (USItype)(__t0 >> 32); \ + } while (0) +# define UMUL_TIME 3 +# endif +# define UDIV_TIME 100 +#endif /* __arm__ */ + +#if defined(__arm__) +/* Let gcc decide how best to implement count_leading_zeros. */ +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif + +#if defined (__AVR__) + +#if W_TYPE_SIZE == 16 +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) +#define COUNT_LEADING_ZEROS_0 16 +#endif /* W_TYPE_SIZE == 16 */ + +#if W_TYPE_SIZE == 32 +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* W_TYPE_SIZE == 32 */ + +#if W_TYPE_SIZE == 64 +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X)) +#define COUNT_LEADING_ZEROS_0 64 +#endif /* W_TYPE_SIZE == 64 */ + +#endif /* defined (__AVR__) */ + +#if defined (__CRIS__) + +#if __CRIS_arch_version >= 3 +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __CRIS_arch_version >= 3 */ + +#if __CRIS_arch_version >= 8 +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) +#endif /* __CRIS_arch_version >= 8 */ + +#if __CRIS_arch_version >= 10 +#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v)) +#else +#define __umulsidi3 __umulsidi3 +extern UDItype __umulsidi3 (USItype, USItype); +#endif /* __CRIS_arch_version >= 10 */ + +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __x = __umulsidi3 (u, v); \ + (w0) = (USItype) (__x); \ + (w1) = (USItype) (__x >> 32); \ + } while (0) + +/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for + DFmode ("double" intrinsics, avoiding two of the three insns handling + carry), but defining them as open-code C composing and doing the + operation in DImode (UDImode) shows that the DImode needs work: + register pressure from requiring neighboring registers and the + traffic to and from them come to dominate, in the 4.7 series. */ + +#endif /* defined (__CRIS__) */ + +#if defined (__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%rM" ((USItype) (ah)), \ + "rM" ((USItype) (bh)), \ + "%rM" ((USItype) (al)), \ + "rM" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "rM" ((USItype) (ah)), \ + "rM" ((USItype) (bh)), \ + "rM" ((USItype) (al)), \ + "rM" ((USItype) (bl))) +#if defined (_PA_RISC1_1) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + union \ + { \ + UDItype __f; \ + struct {USItype __w1, __w0;} __w1w0; \ + } __t; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=x" (__t.__f) \ + : "x" ((USItype) (u)), \ + "x" ((USItype) (v))); \ + (w1) = __t.__w1w0.__w1; \ + (w0) = __t.__w1w0.__w0; \ + } while (0) +#define UMUL_TIME 8 +#else +#define UMUL_TIME 30 +#endif +#define UDIV_TIME 40 +#define count_leading_zeros(count, x) \ + do { \ + USItype __tmp; \ + __asm__ ( \ + "ldi 1,%0\n" \ +" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ +" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\ +" ldo 16(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ +" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\ +" ldo 8(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ +" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\ +" ldo 4(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ +" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\ +" ldo 2(%0),%0 ; Yes. Perform add.\n" \ +" extru %1,30,1,%1 ; Extract bit 1.\n" \ +" sub %0,%1,%0 ; Subtract it.\n" \ + : "=r" (count), "=r" (__tmp) : "1" (x)); \ + } while (0) +#endif + +#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 +#if !defined (__zarch__) +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("lr %N0,%1\n\tmr %0,%2" \ + : "=&r" (__x.__ll) \ + : "r" (m0), "r" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__x.__ll) \ + : "0" (__x.__ll), "r" (d)); \ + (q) = __x.__i.__l; (r) = __x.__i.__h; \ + } while (0) +#else +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + register SItype __r0 __asm__ ("0"); \ + register SItype __r1 __asm__ ("1") = (m0); \ + \ + __asm__ ("mr\t%%r0,%3" \ + : "=r" (__r0), "=r" (__r1) \ + : "r" (__r1), "r" (m1)); \ + (xh) = __r0; (xl) = __r1; \ + } while (0) + +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + register SItype __r0 __asm__ ("0") = (n1); \ + register SItype __r1 __asm__ ("1") = (n0); \ + \ + __asm__ ("dr\t%%r0,%4" \ + : "=r" (__r0), "=r" (__r1) \ + : "r" (__r0), "r" (__r1), "r" (d)); \ + (q) = __r1; (r) = __r0; \ + } while (0) +#endif /* __zarch__ */ +#endif + +#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%0" ((USItype) (ah)), \ + "g" ((USItype) (bh)), \ + "%1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "g" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mul{l} %3" \ + : "=a" ((USItype) (w0)), \ + "=d" ((USItype) (w1)) \ + : "%0" ((USItype) (u)), \ + "rm" ((USItype) (v))) +#define udiv_qrnnd(q, r, n1, n0, dv) \ + __asm__ ("div{l} %4" \ + : "=a" ((USItype) (q)), \ + "=d" ((USItype) (r)) \ + : "0" ((USItype) (n0)), \ + "1" ((USItype) (n1)), \ + "rm" ((USItype) (dv))) +#define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) +#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) +#define UMUL_TIME 40 +#define UDIV_TIME 40 +#endif /* 80x86 */ + +#if defined (__x86_64__) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \ + : "=r" ((UDItype) (sh)), \ + "=&r" ((UDItype) (sl)) \ + : "%0" ((UDItype) (ah)), \ + "rme" ((UDItype) (bh)), \ + "%1" ((UDItype) (al)), \ + "rme" ((UDItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \ + : "=r" ((UDItype) (sh)), \ + "=&r" ((UDItype) (sl)) \ + : "0" ((UDItype) (ah)), \ + "rme" ((UDItype) (bh)), \ + "1" ((UDItype) (al)), \ + "rme" ((UDItype) (bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mul{q} %3" \ + : "=a" ((UDItype) (w0)), \ + "=d" ((UDItype) (w1)) \ + : "%0" ((UDItype) (u)), \ + "rm" ((UDItype) (v))) +#define udiv_qrnnd(q, r, n1, n0, dv) \ + __asm__ ("div{q} %4" \ + : "=a" ((UDItype) (q)), \ + "=d" ((UDItype) (r)) \ + : "0" ((UDItype) (n0)), \ + "1" ((UDItype) (n1)), \ + "rm" ((UDItype) (dv))) +#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) +#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) +#define UMUL_TIME 40 +#define UDIV_TIME 40 +#endif /* x86_64 */ + +#if defined (__i960__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype) (u)), \ + "dI" ((USItype) (v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype) (u)), \ + "dI" ((USItype) (v))); \ + __w; }) +#endif /* __i960__ */ + +#if defined (__ia64) && W_TYPE_SIZE == 64 +/* This form encourages gcc (pre-release 3.4 at least) to emit predicated + "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic + code using "al>= _c; \ + if (_x >= 1 << 4) \ + _x >>= 4, _c += 4; \ + if (_x >= 1 << 2) \ + _x >>= 2, _c += 2; \ + _c += _x >> 1; \ + (count) = W_TYPE_SIZE - 1 - _c; \ + } while (0) +/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 + based, and we don't need a special case for x==0 here */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + __asm__ ("popcnt %0 = %1" \ + : "=r" (count) \ + : "r" ((__ctz_x-1) & ~__ctz_x)); \ + } while (0) +#define UMUL_TIME 14 +#endif + +#if defined (__M32R__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + /* The cmp clears the condition bit. */ \ + __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "r" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "r" ((USItype) (bl)) \ + : "cbit") +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + /* The cmp clears the condition bit. */ \ + __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "r" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "r" ((USItype) (bl)) \ + : "cbit") +#endif /* __M32R__ */ + +#if defined (__mc68000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ + : "=d" ((USItype) (sh)), \ + "=&d" ((USItype) (sl)) \ + : "%0" ((USItype) (ah)), \ + "d" ((USItype) (bh)), \ + "%1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ + : "=d" ((USItype) (sh)), \ + "=&d" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "d" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "g" ((USItype) (bl))) + +/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */ +#if (defined (__mc68020__) && !defined (__mc68060__)) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype) (w0)), \ + "=d" ((USItype) (w1)) \ + : "%0" ((USItype) (u)), \ + "dmi" ((USItype) (v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype) (q)), \ + "=d" ((USItype) (r)) \ + : "0" ((USItype) (n0)), \ + "1" ((USItype) (n1)), \ + "dmi" ((USItype) (d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype) (q)), \ + "=d" ((USItype) (r)) \ + : "0" ((USItype) (n0)), \ + "1" ((USItype) (n1)), \ + "dmi" ((USItype) (d))) + +#elif defined (__mcoldfire__) /* not mc68020 */ + +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("| Inlined umul_ppmm\n" \ + " move%.l %2,%/d0\n" \ + " move%.l %3,%/d1\n" \ + " move%.l %/d0,%/d2\n" \ + " swap %/d0\n" \ + " move%.l %/d1,%/d3\n" \ + " swap %/d1\n" \ + " move%.w %/d2,%/d4\n" \ + " mulu %/d3,%/d4\n" \ + " mulu %/d1,%/d2\n" \ + " mulu %/d0,%/d3\n" \ + " mulu %/d0,%/d1\n" \ + " move%.l %/d4,%/d0\n" \ + " clr%.w %/d0\n" \ + " swap %/d0\n" \ + " add%.l %/d0,%/d2\n" \ + " add%.l %/d3,%/d2\n" \ + " jcc 1f\n" \ + " add%.l %#65536,%/d1\n" \ + "1: swap %/d2\n" \ + " moveq %#0,%/d0\n" \ + " move%.w %/d2,%/d0\n" \ + " move%.w %/d4,%/d2\n" \ + " move%.l %/d2,%1\n" \ + " add%.l %/d1,%/d0\n" \ + " move%.l %/d0,%0" \ + : "=g" ((USItype) (xh)), \ + "=g" ((USItype) (xl)) \ + : "g" ((USItype) (a)), \ + "g" ((USItype) (b)) \ + : "d0", "d1", "d2", "d3", "d4") +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#else /* not ColdFire */ +/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("| Inlined umul_ppmm\n" \ + " move%.l %2,%/d0\n" \ + " move%.l %3,%/d1\n" \ + " move%.l %/d0,%/d2\n" \ + " swap %/d0\n" \ + " move%.l %/d1,%/d3\n" \ + " swap %/d1\n" \ + " move%.w %/d2,%/d4\n" \ + " mulu %/d3,%/d4\n" \ + " mulu %/d1,%/d2\n" \ + " mulu %/d0,%/d3\n" \ + " mulu %/d0,%/d1\n" \ + " move%.l %/d4,%/d0\n" \ + " eor%.w %/d0,%/d0\n" \ + " swap %/d0\n" \ + " add%.l %/d0,%/d2\n" \ + " add%.l %/d3,%/d2\n" \ + " jcc 1f\n" \ + " add%.l %#65536,%/d1\n" \ + "1: swap %/d2\n" \ + " moveq %#0,%/d0\n" \ + " move%.w %/d2,%/d0\n" \ + " move%.w %/d4,%/d2\n" \ + " move%.l %/d2,%1\n" \ + " add%.l %/d1,%/d0\n" \ + " move%.l %/d0,%0" \ + : "=g" ((USItype) (xh)), \ + "=g" ((USItype) (xl)) \ + : "g" ((USItype) (a)), \ + "g" ((USItype) (b)) \ + : "d0", "d1", "d2", "d3", "d4") +#define UMUL_TIME 100 +#define UDIV_TIME 400 + +#endif /* not mc68020 */ + +/* The '020, '030, '040 and '060 have bitfield insns. + cpu32 disguises as a 68020, but lacks them. */ +#if defined (__mc68020__) && !defined (__mcpu32__) +#define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" ((USItype) (count)) \ + : "od" ((USItype) (x)), "n" (0)) +/* Some ColdFire architectures have a ff1 instruction supported via + __builtin_clz. */ +#elif defined (__mcfisaaplus__) || defined (__mcfisac__) +#define count_leading_zeros(count,x) ((count) = __builtin_clz (x)) +#define COUNT_LEADING_ZEROS_0 32 +#endif +#endif /* mc68000 */ + +#if defined (__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%rJ" ((USItype) (ah)), \ + "rJ" ((USItype) (bh)), \ + "%rJ" ((USItype) (al)), \ + "rJ" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "rJ" ((USItype) (ah)), \ + "rJ" ((USItype) (bh)), \ + "rJ" ((USItype) (al)), \ + "rJ" ((USItype) (bl))) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" \ + : "=r" (__cbtmp) \ + : "r" ((USItype) (x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 63 /* sic */ +#if defined (__mc88110__) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("mulu.d %0,%1,%2" \ + : "=r" (__xx.__ll) \ + : "r" ((USItype) (u)), \ + "r" ((USItype) (v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + USItype __q; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q) \ + : "r" (__xx.__ll), \ + "r" ((USItype) (d))); \ + (r) = (n0) - __q * (d); (q) = __q; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __mc88110__ */ +#endif /* __m88000__ */ + +#if defined (__mn10300__) +# if defined (__AM33__) +# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) +# define umul_ppmm(w1, w0, u, v) \ + asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) +# define smul_ppmm(w1, w0, u, v) \ + asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) +# else +# define umul_ppmm(w1, w0, u, v) \ + asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) +# define smul_ppmm(w1, w0, u, v) \ + asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) +# endif +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + DWunion __s, __a, __b; \ + __a.s.low = (al); __a.s.high = (ah); \ + __b.s.low = (bl); __b.s.high = (bh); \ + __s.ll = __a.ll + __b.ll; \ + (sl) = __s.s.low; (sh) = __s.s.high; \ + } while (0) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + DWunion __s, __a, __b; \ + __a.s.low = (al); __a.s.high = (ah); \ + __b.s.low = (bl); __b.s.high = (bh); \ + __s.ll = __a.ll - __b.ll; \ + (sl) = __s.s.low; (sh) = __s.s.high; \ + } while (0) +# define udiv_qrnnd(q, r, nh, nl, d) \ + asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) +# define sdiv_qrnnd(q, r, nh, nl, d) \ + asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) +# define UMUL_TIME 3 +# define UDIV_TIME 38 +#endif + +#if defined (__mips__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ + (w1) = (USItype) (__x >> 32); \ + (w0) = (USItype) (__x); \ + } while (0) +#define UMUL_TIME 10 +#define UDIV_TIME 100 + +#if (__mips == 32 || __mips == 64) && ! defined (__mips16) +#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif +#endif /* __mips__ */ + +/* FIXME: We should test _IBMR2 here when we add assembly support for the + system vendor compilers. + FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good + enough, since that hits ARM and m68k too. */ +#if (defined (_ARCH_PPC) /* AIX */ \ + || defined (__powerpc__) /* gcc */ \ + || defined (__POWERPC__) /* BEOS */ \ + || defined (__ppc__) /* Darwin */ \ + || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ + || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ + && CPU_FAMILY == PPC) \ + ) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 32 +#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ + || defined (__ppc__) \ + || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ + || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ + && CPU_FAMILY == PPC) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#endif +#endif /* 32-bit POWER architecture variants. */ + +/* We should test _IBMR2 here when we add assembly support for the system + vendor compilers. */ +#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 64 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + DItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 /* ??? */ +#define UDIV_TIME 120 /* ??? */ +#endif /* 64-bit PowerPC. */ + +#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n\tae %0,%3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%0" ((USItype) (ah)), \ + "r" ((USItype) (bh)), \ + "%1" ((USItype) (al)), \ + "r" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n\tse %0,%3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "r" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "r" ((USItype) (bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2\n" \ +" mts r10,%2\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" cas %0,r2,r0\n" \ +" mfs r10,%1" \ + : "=r" ((USItype) (ph)), \ + "=r" ((USItype) (pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#define count_leading_zeros(count, x) \ + do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype) (count)) \ + : "r" ((USItype) (x) >> 16)); \ + else \ + { \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype) (count)) \ + : "r" ((USItype) (x))); \ + (count) += 16; \ + } \ + } while (0) +#endif + +#if defined(__riscv) +#ifdef __riscv_mul +#define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v)) +#define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b)) +#else +#if __riscv_xlen == 32 + #define MULUW3 "call __mulsi3" +#elif __riscv_xlen == 64 + #define MULUW3 "call __muldi3" +#else +#error unsupport xlen +#endif /* __riscv_xlen */ +/* We rely on the fact that MULUW3 doesn't clobber the t-registers. + It can get better register allocation result. */ +#define __muluw3(a, b) \ + ({ \ + register UWtype __op0 asm ("a0") = a; \ + register UWtype __op1 asm ("a1") = b; \ + asm volatile (MULUW3 \ + : "+r" (__op0), "+r" (__op1) \ + : \ + : "ra", "a2", "a3"); \ + __op0; \ + }) +#endif /* __riscv_mul */ +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart (u); \ + __uh = __ll_highpart (u); \ + __vl = __ll_lowpart (v); \ + __vh = __ll_highpart (v); \ + \ + __x0 = __muluw3 (__ul, __vl); \ + __x1 = __muluw3 (__ul, __vh); \ + __x2 = __muluw3 (__uh, __vl); \ + __x3 = __muluw3 (__uh, __vh); \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ + } while (0) +#endif /* __riscv */ + +#if defined(__sh__) && W_TYPE_SIZE == 32 +#ifndef __sh1__ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \ + : "=r<" ((USItype)(w1)), \ + "=r<" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + +/* This is the same algorithm as __udiv_qrnnd_c. */ +#define UDIV_NEEDS_NORMALIZATION 1 + +#ifdef __FDPIC__ +/* FDPIC needs a special version of the asm fragment to extract the + code address from the function descriptor. __udiv_qrnnd_16 is + assumed to be local and not to use the GOT, so loading r12 is + not needed. */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ + __attribute__ ((visibility ("hidden"))); \ + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ + __asm__ ( \ + "mov%M4 %4,r5\n" \ +" swap.w %3,r4\n" \ +" swap.w r5,r6\n" \ +" mov.l @%5,r2\n" \ +" jsr @r2\n" \ +" shll16 r6\n" \ +" swap.w r4,r4\n" \ +" mov.l @%5,r2\n" \ +" jsr @r2\n" \ +" swap.w r1,%0\n" \ +" or r1,%0" \ + : "=r" (q), "=&z" (r) \ + : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ + : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ + } while (0) +#else +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ + __attribute__ ((visibility ("hidden"))); \ + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ + __asm__ ( \ + "mov%M4 %4,r5\n" \ +" swap.w %3,r4\n" \ +" swap.w r5,r6\n" \ +" jsr @%5\n" \ +" shll16 r6\n" \ +" swap.w r4,r4\n" \ +" jsr @%5\n" \ +" swap.w r1,%0\n" \ +" or r1,%0" \ + : "=r" (q), "=&z" (r) \ + : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ + : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ + } while (0) +#endif /* __FDPIC__ */ + +#define UDIV_TIME 80 + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("clrt;subc %5,%1; subc %4,%0" \ + : "=r" (sh), "=r" (sl) \ + : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t") + +#endif /* __sh__ */ + +#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \ + && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "%rJ" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "%rJ" ((USItype) (al)), \ + "rI" ((USItype) (bl)) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "rJ" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "rJ" ((USItype) (al)), \ + "rI" ((USItype) (bl)) \ + __CLOBBER_CC) +#if defined (__sparc_v9__) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + register USItype __g1 asm ("g1"); \ + __asm__ ("umul\t%2,%3,%1\n\t" \ + "srlx\t%1, 32, %0" \ + : "=r" ((USItype) (w1)), \ + "=r" (__g1) \ + : "r" ((USItype) (u)), \ + "r" ((USItype) (v))); \ + (w0) = __g1; \ + } while (0) +#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ + __asm__ ("mov\t%2,%%y\n\t" \ + "udiv\t%3,%4,%0\n\t" \ + "umul\t%0,%4,%1\n\t" \ + "sub\t%3,%1,%1" \ + : "=&r" ((USItype) (__q)), \ + "=&r" ((USItype) (__r)) \ + : "r" ((USItype) (__n1)), \ + "r" ((USItype) (__n0)), \ + "r" ((USItype) (__d))) +#else +#if defined (__sparc_v8__) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype) (w1)), \ + "=r" ((USItype) (w0)) \ + : "r" ((USItype) (u)), \ + "r" ((USItype) (v))) +#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ + __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ + : "=&r" ((USItype) (__q)), \ + "=&r" ((USItype) (__r)) \ + : "r" ((USItype) (__n1)), \ + "r" ((USItype) (__n0)), \ + "r" ((USItype) (__d))) +#else +#if defined (__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype) (w1)), \ + "=r" ((USItype) (w0)) \ + : "r" ((USItype) (u)), \ + "r" ((USItype) (v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ +" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ +" tst %%g0\n" \ +" divscc %3,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%0\n" \ +" rd %%y,%1\n" \ +" bl,a 1f\n" \ +" add %1,%4,%1\n" \ +"1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype) (q)), \ + "=r" ((USItype) (r)) \ + : "r" ((USItype) (n1)), \ + "r" ((USItype) (n0)), \ + "rI" ((USItype) (d)) \ + : "g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#define count_leading_zeros(count, x) \ + do { \ + __asm__ ("scan %1,1,%0" \ + : "=r" ((USItype) (count)) \ + : "r" ((USItype) (x))); \ + } while (0) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +#else +/* SPARC without integer multiplication and divide instructions. + (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ +" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\ +" sra %3,31,%%o5 ! Don't move this insn\n" \ +" and %2,%%o5,%%o5 ! Don't move this insn\n" \ +" andcc %%g0,0,%%g1 ! Don't move this insn\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,0,%%g1\n" \ +" add %%g1,%%o5,%0\n" \ +" rd %%y,%1" \ + : "=r" ((USItype) (w1)), \ + "=r" ((USItype) (w0)) \ + : "%rI" ((USItype) (u)), \ + "r" ((USItype) (v)) \ + : "g1", "o5" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ +#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ +" mov 32,%%g1\n" \ +" subcc %1,%2,%%g0\n" \ +"1: bcs 5f\n" \ +" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ +" sub %1,%2,%1 ! this kills msb of n\n" \ +" addx %1,%1,%1 ! so this can't give carry\n" \ +" subcc %%g1,1,%%g1\n" \ +"2: bne 1b\n" \ +" subcc %1,%2,%%g0\n" \ +" bcs 3f\n" \ +" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ +" b 3f\n" \ +" sub %1,%2,%1 ! this kills msb of n\n" \ +"4: sub %1,%2,%1\n" \ +"5: addxcc %1,%1,%1\n" \ +" bcc 2b\n" \ +" subcc %%g1,1,%%g1\n" \ +"! Got carry from n. Subtract next step to cancel this carry.\n" \ +" bne 4b\n" \ +" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \ +" sub %1,%2,%1\n" \ +"3: xnor %0,0,%0\n" \ +" ! End of inline udiv_qrnnd" \ + : "=&r" ((USItype) (__q)), \ + "=&r" ((USItype) (__r)) \ + : "r" ((USItype) (__d)), \ + "1" ((USItype) (__n1)), \ + "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC) +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +#endif /* __sparc_v9__ */ +#endif /* sparc32 */ + +#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \ + && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UDItype __carry = 0; \ + __asm__ ("addcc\t%r5,%6,%1\n\t" \ + "add\t%r3,%4,%0\n\t" \ + "movcs\t%%xcc, 1, %2\n\t" \ + "add\t%0, %2, %0" \ + : "=r" ((UDItype)(sh)), \ + "=&r" ((UDItype)(sl)), \ + "+r" (__carry) \ + : "%rJ" ((UDItype)(ah)), \ + "rI" ((UDItype)(bh)), \ + "%rJ" ((UDItype)(al)), \ + "rI" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + } while (0) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UDItype __carry = 0; \ + __asm__ ("subcc\t%r5,%6,%1\n\t" \ + "sub\t%r3,%4,%0\n\t" \ + "movcs\t%%xcc, 1, %2\n\t" \ + "sub\t%0, %2, %0" \ + : "=r" ((UDItype)(sh)), \ + "=&r" ((UDItype)(sl)), \ + "+r" (__carry) \ + : "%rJ" ((UDItype)(ah)), \ + "rI" ((UDItype)(bh)), \ + "%rJ" ((UDItype)(al)), \ + "rI" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + } while (0) + +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UDItype tmp1, tmp2, tmp3, tmp4; \ + __asm__ __volatile__ ( \ + "srl %7,0,%3\n\t" \ + "mulx %3,%6,%1\n\t" \ + "srlx %6,32,%2\n\t" \ + "mulx %2,%3,%4\n\t" \ + "sllx %4,32,%5\n\t" \ + "srl %6,0,%3\n\t" \ + "sub %1,%5,%5\n\t" \ + "srlx %5,32,%5\n\t" \ + "addcc %4,%5,%4\n\t" \ + "srlx %7,32,%5\n\t" \ + "mulx %3,%5,%3\n\t" \ + "mulx %2,%5,%5\n\t" \ + "sethi %%hi(0x80000000),%2\n\t" \ + "addcc %4,%3,%4\n\t" \ + "srlx %4,32,%4\n\t" \ + "add %2,%2,%2\n\t" \ + "movcc %%xcc,%%g0,%2\n\t" \ + "addcc %5,%4,%5\n\t" \ + "sllx %3,32,%3\n\t" \ + "add %1,%3,%1\n\t" \ + "add %5,%2,%0" \ + : "=r" ((UDItype)(wh)), \ + "=&r" ((UDItype)(wl)), \ + "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \ + : "r" ((UDItype)(u)), \ + "r" ((UDItype)(v)) \ + __CLOBBER_CC); \ + } while (0) +#define UMUL_TIME 96 +#define UDIV_TIME 230 +#endif /* sparc64 */ + +#if defined (__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ + : "=g" ((USItype) (sh)), \ + "=&g" ((USItype) (sl)) \ + : "%0" ((USItype) (ah)), \ + "g" ((USItype) (bh)), \ + "%1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ + : "=g" ((USItype) (sh)), \ + "=&g" ((USItype) (sl)) \ + : "0" ((USItype) (ah)), \ + "g" ((USItype) (bh)), \ + "1" ((USItype) (al)), \ + "g" ((USItype) (bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union { \ + UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=r" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; \ + (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ + } while (0) +#endif /* __vax__ */ + +#ifdef _TMS320C6X +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do \ + { \ + UDItype __ll; \ + __asm__ ("addu .l1 %1, %2, %0" \ + : "=a" (__ll) : "a" (al), "a" (bl)); \ + (sl) = (USItype)__ll; \ + (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \ + } \ + while (0) + +#ifdef _TMS320C6400_PLUS +#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ + (w1) = (USItype) (__x >> 32); \ + (w0) = (USItype) (__x); \ + } while (0) +#endif /* _TMS320C6400_PLUS */ + +#define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) +#ifdef _TMS320C6400 +#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) +#endif +#define UMUL_TIME 4 +#define UDIV_TIME 40 +#endif /* _TMS320C6X */ + +#if defined (__xtensa__) && W_TYPE_SIZE == 32 +/* This code is not Xtensa-configuration-specific, so rely on the compiler + to expand builtin functions depending on what configuration features + are available. This avoids library calls when the operation can be + performed in-line. */ +#define umul_ppmm(w1, w0, u, v) \ + do { \ + DWunion __w; \ + __w.ll = __builtin_umulsidi3 (u, v); \ + w1 = __w.s.high; \ + w0 = __w.s.low; \ + } while (0) +#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v) +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) +#endif /* __xtensa__ */ + +#if defined xstormy16 +extern UHItype __stormy16_count_leading_zeros (UHItype); +#define count_leading_zeros(count, x) \ + do \ + { \ + UHItype size; \ + \ + /* We assume that W_TYPE_SIZE is a multiple of 16... */ \ + for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \ + { \ + UHItype c; \ + \ + c = __clzhi2 ((x) >> (size - 16)); \ + (count) += c; \ + if (c != 16) \ + break; \ + } \ + } \ + while (0) +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif + +#if defined (__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {long int __ll; \ + struct {unsigned int __h, __l;} __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ + } while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +/* If this machine has no inline assembler, use C macros. */ + +#if !defined (add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) +#endif + +/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of + smul_ppmm. */ +#if !defined (umul_ppmm) && defined (smul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __xm0 = (u), __xm1 = (v); \ + smul_ppmm (__w1, w0, __xm0, __xm1); \ + (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ + } while (0) +#endif + +/* If we still don't have umul_ppmm, define it using plain C. */ +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart (u); \ + __uh = __ll_highpart (u); \ + __vl = __ll_lowpart (v); \ + __vh = __ll_highpart (v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ + } while (0) +#endif + +#if !defined (__umulsidi3) +#define __umulsidi3(u, v) \ + ({DWunion __w; \ + umul_ppmm (__w.s.high, __w.s.low, u, v); \ + __w.ll; }) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ + UWtype __r; \ + (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) +#endif + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#if !defined (count_leading_zeros) +#define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((UWtype)1<<2*__BITS4) \ + ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif + +#if !defined (count_trailing_zeros) +/* Define count_trailing_zeros using count_leading_zeros. The latter might be + defined in asm, but if it is not, the C version above is good enough. */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ + } while (0) +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/abstract-machine/am/src/riscv/npc/libgcc/muldi3.S b/abstract-machine/am/src/riscv/npc/libgcc/muldi3.S new file mode 100644 index 0000000..16166d0 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/muldi3.S @@ -0,0 +1,48 @@ +/* Integer multiplication routines for RISC-V. + + Copyright (C) 2016-2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "riscv-asm.h" + + .text + .align 2 + +#if __riscv_xlen == 32 +/* Our RV64 64-bit routine is equivalent to our RV32 32-bit routine. */ +# define __muldi3 __mulsi3 +#endif + +FUNC_BEGIN (__muldi3) + mv a2, a0 + li a0, 0 +.L1: + andi a3, a1, 1 + beqz a3, .L2 + add a0, a0, a2 +.L2: + srli a1, a1, 1 + slli a2, a2, 1 + bnez a1, .L1 + ret +FUNC_END (__muldi3) diff --git a/abstract-machine/am/src/riscv/npc/libgcc/multi3.c b/abstract-machine/am/src/riscv/npc/libgcc/multi3.c new file mode 100644 index 0000000..ce357ed --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/multi3.c @@ -0,0 +1,86 @@ +/* Multiplication two double word integers for RISC-V. + + Copyright (C) 2016-2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +//#include "tconfig.h" +//#include "tsystem.h" +//#include "coretypes.h" +//#include "tm.h" +//#include "libgcc_tm.h" +#define LIBGCC2_UNITS_PER_WORD (__riscv_xlen / 8) + +#include "libgcc2.h" + +#if __riscv_xlen == 32 +/* Our RV64 64-bit routines are equivalent to our RV32 32-bit routines. */ +# define __multi3 __muldi3 +#endif + +DWtype +__multi3 (DWtype u, DWtype v) +{ + const DWunion uu = {.ll = u}; + const DWunion vv = {.ll = v}; + DWunion w; + UWtype u_low = uu.s.low; + UWtype v_low = vv.s.low; + UWtype u_low_msb; + UWtype w_low = 0; + UWtype new_w_low; + UWtype w_high = 0; + UWtype w_high_tmp = 0; + UWtype w_high_tmp2x; + UWtype carry; + + /* Calculate low half part of u and v, and get a UDWtype result just like + what __umulsidi3 do. */ + do + { + new_w_low = w_low + u_low; + w_high_tmp2x = w_high_tmp << 1; + w_high_tmp += w_high; + if (v_low & 1) + { + carry = new_w_low < w_low; + w_low = new_w_low; + w_high = carry + w_high_tmp; + } + u_low_msb = (u_low >> ((sizeof (UWtype) * 8) - 1)); + v_low >>= 1; + u_low <<= 1; + w_high_tmp = u_low_msb | w_high_tmp2x; + } + while (v_low); + + w.s.low = w_low; + w.s.high = w_high; + + if (uu.s.high) + w.s.high = w.s.high + __muluw3(vv.s.low, uu.s.high); + + if (vv.s.high) + w.s.high += __muluw3(uu.s.low, vv.s.high); + + return w.ll; +} diff --git a/abstract-machine/am/src/riscv/npc/libgcc/riscv-asm.h b/abstract-machine/am/src/riscv/npc/libgcc/riscv-asm.h new file mode 100644 index 0000000..b70930e --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/riscv-asm.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2017-2022 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define FUNC_TYPE(X) .type X,@function +#define FUNC_SIZE(X) .size X,.-X + +#define FUNC_BEGIN(X) \ + .globl X; \ + FUNC_TYPE (X); \ +X: + +#define FUNC_END(X) \ + FUNC_SIZE(X) + +#define FUNC_ALIAS(X,Y) \ + .globl X; \ + X = Y + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b +#define HIDDEN_JUMPTARGET(X) CONCAT1(__hidden_, X) +#define HIDDEN_DEF(X) FUNC_ALIAS(HIDDEN_JUMPTARGET(X), X); \ + .hidden HIDDEN_JUMPTARGET(X) diff --git a/abstract-machine/am/src/riscv/npc/libgcc/unused.c b/abstract-machine/am/src/riscv/npc/libgcc/unused.c new file mode 100644 index 0000000..bf95515 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/libgcc/unused.c @@ -0,0 +1,5 @@ +#include +#include + +double __muldf3 (double a, double b) { panic("Not implement"); } +long __fixdfdi (double a) { panic("Not implement"); } diff --git a/abstract-machine/am/src/riscv/npc/mpe.c b/abstract-machine/am/src/riscv/npc/mpe.c new file mode 100644 index 0000000..6715aa2 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/mpe.c @@ -0,0 +1,17 @@ +#include + +bool mpe_init(void (*entry)()) { + return false; +} + +int cpu_count() { + return 1; +} + +int cpu_current() { + return 0; +} + +int atomic_xchg(int *addr, int newval) { + return 0; +} diff --git a/abstract-machine/am/src/riscv/npc/start.S b/abstract-machine/am/src/riscv/npc/start.S new file mode 100644 index 0000000..3e56e5c --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/start.S @@ -0,0 +1,8 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + mv s0, zero + la sp, _stack_pointer + jal _trm_init diff --git a/abstract-machine/am/src/riscv/npc/timer.c b/abstract-machine/am/src/riscv/npc/timer.c new file mode 100644 index 0000000..6ea0ffa --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/timer.c @@ -0,0 +1,17 @@ +#include + +void __am_timer_init() { +} + +void __am_timer_uptime(AM_TIMER_UPTIME_T *uptime) { + uptime->us = 0; +} + +void __am_timer_rtc(AM_TIMER_RTC_T *rtc) { + rtc->second = 0; + rtc->minute = 0; + rtc->hour = 0; + rtc->day = 0; + rtc->month = 0; + rtc->year = 1900; +} diff --git a/abstract-machine/am/src/riscv/npc/trap.S b/abstract-machine/am/src/riscv/npc/trap.S new file mode 100644 index 0000000..209b5b5 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/trap.S @@ -0,0 +1,66 @@ +#define concat_temp(x, y) x ## y +#define concat(x, y) concat_temp(x, y) +#define MAP(c, f) c(f) + +#if __riscv_xlen == 32 +#define LOAD lw +#define STORE sw +#define XLEN 4 +#else +#define LOAD ld +#define STORE sd +#define XLEN 8 +#endif + +#define REGS_LO16(f) \ + f( 1) f( 3) f( 4) f( 5) f( 6) f( 7) f( 8) f( 9) \ +f(10) f(11) f(12) f(13) f(14) f(15) +#ifndef __riscv_e +#define REGS_HI16(f) \ + f(16) f(17) f(18) f(19) \ +f(20) f(21) f(22) f(23) f(24) f(25) f(26) f(27) f(28) f(29) \ +f(30) f(31) +#define NR_REGS 32 +#else +#define REGS_HI16(f) +#define NR_REGS 16 +#endif + +#define REGS(f) REGS_LO16(f) REGS_HI16(f) + +#define PUSH(n) STORE concat(x, n), (n * XLEN)(sp); +#define POP(n) LOAD concat(x, n), (n * XLEN)(sp); + +#define CONTEXT_SIZE ((NR_REGS + 3 + 1) * XLEN) +#define OFFSET_SP ( 2 * XLEN) +#define OFFSET_CAUSE ((NR_REGS + 0) * XLEN) +#define OFFSET_STATUS ((NR_REGS + 1) * XLEN) +#define OFFSET_EPC ((NR_REGS + 2) * XLEN) + +.align 3 +.globl __am_asm_trap +__am_asm_trap: + addi sp, sp, -CONTEXT_SIZE + + MAP(REGS, PUSH) + + csrr t0, mcause + csrr t1, mstatus + csrr t2, mepc + + STORE t0, OFFSET_CAUSE(sp) + STORE t1, OFFSET_STATUS(sp) + STORE t2, OFFSET_EPC(sp) + + mv a0, sp + jal __am_irq_handle + + LOAD t1, OFFSET_STATUS(sp) + LOAD t2, OFFSET_EPC(sp) + csrw mstatus, t1 + csrw mepc, t2 + + MAP(REGS, POP) + + addi sp, sp, CONTEXT_SIZE + mret diff --git a/abstract-machine/am/src/riscv/npc/trm.c b/abstract-machine/am/src/riscv/npc/trm.c new file mode 100644 index 0000000..0efe6e7 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/trm.c @@ -0,0 +1,27 @@ +#include +#include + +extern char _heap_start; +int main(const char *args); + +extern char _pmem_start; +#define PMEM_SIZE (128 * 1024 * 1024) +#define PMEM_END ((uintptr_t)&_pmem_start + PMEM_SIZE) + +Area heap = RANGE(&_heap_start, PMEM_END); +#ifndef MAINARGS +#define MAINARGS "" +#endif +static const char mainargs[] = MAINARGS; + +void putch(char ch) { +} + +void halt(int code) { + while (1); +} + +void _trm_init() { + int ret = main(mainargs); + halt(ret); +} diff --git a/abstract-machine/am/src/riscv/npc/vme.c b/abstract-machine/am/src/riscv/npc/vme.c new file mode 100644 index 0000000..5134154 --- /dev/null +++ b/abstract-machine/am/src/riscv/npc/vme.c @@ -0,0 +1,18 @@ +#include + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + return false; +} + +void protect(AddrSpace *as) { +} + +void unprotect(AddrSpace *as) { +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/riscv/riscv.h b/abstract-machine/am/src/riscv/riscv.h new file mode 100644 index 0000000..4cb9182 --- /dev/null +++ b/abstract-machine/am/src/riscv/riscv.h @@ -0,0 +1,34 @@ +#ifndef RISCV_H__ +#define RISCV_H__ + +#include + +static inline uint8_t inb(uintptr_t addr) { return *(volatile uint8_t *)addr; } +static inline uint16_t inw(uintptr_t addr) { return *(volatile uint16_t *)addr; } +static inline uint32_t inl(uintptr_t addr) { return *(volatile uint32_t *)addr; } + +static inline void outb(uintptr_t addr, uint8_t data) { *(volatile uint8_t *)addr = data; } +static inline void outw(uintptr_t addr, uint16_t data) { *(volatile uint16_t *)addr = data; } +static inline void outl(uintptr_t addr, uint32_t data) { *(volatile uint32_t *)addr = data; } + +#define PTE_V 0x01 +#define PTE_R 0x02 +#define PTE_W 0x04 +#define PTE_X 0x08 +#define PTE_U 0x10 +#define PTE_A 0x40 +#define PTE_D 0x80 + +enum { MODE_U, MODE_S, MODE_M = 3 }; +#define MSTATUS_MXR (1 << 19) +#define MSTATUS_SUM (1 << 18) + +#if __riscv_xlen == 64 +#define MSTATUS_SXL (2ull << 34) +#define MSTATUS_UXL (2ull << 32) +#else +#define MSTATUS_SXL 0 +#define MSTATUS_UXL 0 +#endif + +#endif diff --git a/abstract-machine/am/src/riscv/spike/atomic.h b/abstract-machine/am/src/riscv/spike/atomic.h new file mode 100644 index 0000000..eca1320 --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/atomic.h @@ -0,0 +1,78 @@ +// See LICENSE for license details. + +#ifndef _RISCV_ATOMIC_H +#define _RISCV_ATOMIC_H + +//#include "config.h" +//#include "encoding.h" + +// Currently, interrupts are always disabled in M-mode. +#define disable_irqsave() (0) +#define enable_irqrestore(flags) ((void) (flags)) + +typedef struct { int lock; } spinlock_t; +#define SPINLOCK_INIT {0} + +#define mb() asm volatile ("fence" ::: "memory") +#define atomic_set(ptr, val) (*(volatile typeof(*(ptr)) *)(ptr) = val) +#define atomic_read(ptr) (*(volatile typeof(*(ptr)) *)(ptr)) + +#ifdef __riscv_atomic +# define atomic_add(ptr, inc) __sync_fetch_and_add(ptr, inc) +# define atomic_or(ptr, inc) __sync_fetch_and_or(ptr, inc) +# define atomic_swap(ptr, swp) __sync_lock_test_and_set(ptr, swp) +# define atomic_cas(ptr, cmp, swp) __sync_val_compare_and_swap(ptr, cmp, swp) +#else +# define atomic_binop(ptr, inc, op) ({ \ + long flags = disable_irqsave(); \ + typeof(*(ptr)) res = atomic_read(ptr); \ + atomic_set(ptr, op); \ + enable_irqrestore(flags); \ + res; }) +# define atomic_add(ptr, inc) atomic_binop(ptr, inc, res + (inc)) +# define atomic_or(ptr, inc) atomic_binop(ptr, inc, res | (inc)) +# define atomic_swap(ptr, inc) atomic_binop(ptr, inc, (inc)) +# define atomic_cas(ptr, cmp, swp) ({ \ + long flags = disable_irqsave(); \ + typeof(*(ptr)) res = *(volatile typeof(*(ptr)) *)(ptr); \ + if (res == (cmp)) *(volatile typeof(ptr))(ptr) = (swp); \ + enable_irqrestore(flags); \ + res; }) +#endif + +static inline int spinlock_trylock(spinlock_t* lock) +{ + int res = atomic_swap(&lock->lock, -1); + mb(); + return res; +} + +static inline void spinlock_lock(spinlock_t* lock) +{ + do + { + while (atomic_read(&lock->lock)) + ; + } while (spinlock_trylock(lock)); +} + +static inline void spinlock_unlock(spinlock_t* lock) +{ + mb(); + atomic_set(&lock->lock,0); +} + +static inline long spinlock_lock_irqsave(spinlock_t* lock) +{ + long flags = disable_irqsave(); + spinlock_lock(lock); + return flags; +} + +static inline void spinlock_unlock_irqrestore(spinlock_t* lock, long flags) +{ + spinlock_unlock(lock); + enable_irqrestore(flags); +} + +#endif diff --git a/abstract-machine/am/src/riscv/spike/htif.c b/abstract-machine/am/src/riscv/spike/htif.c new file mode 100644 index 0000000..f56ba8f --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/htif.c @@ -0,0 +1,111 @@ +// See LICENSE for license details. + +#include "htif.h" +#include "atomic.h" +#include + +extern uint64_t __htif_base; +volatile uint64_t tohost __attribute__((section(".htif"))); +volatile uint64_t fromhost __attribute__((section(".htif"))); +volatile int htif_console_buf; +static spinlock_t htif_lock = SPINLOCK_INIT; + +#define TOHOST(base_int) (uint64_t *)(base_int + TOHOST_OFFSET) +#define FROMHOST(base_int) (uint64_t *)(base_int + FROMHOST_OFFSET) + +#define TOHOST_OFFSET ((uintptr_t)tohost - (uintptr_t)__htif_base) +#define FROMHOST_OFFSET ((uintptr_t)fromhost - (uintptr_t)__htif_base) + +static void __check_fromhost() +{ + uint64_t fh = fromhost; + if (!fh) + return; + fromhost = 0; + + // this should be from the console + assert(FROMHOST_DEV(fh) == 1); + switch (FROMHOST_CMD(fh)) { + case 0: + htif_console_buf = 1 + (uint8_t)FROMHOST_DATA(fh); + break; + case 1: + break; + default: + assert(0); + } +} + +static void __set_tohost(uintptr_t dev, uintptr_t cmd, uintptr_t data) +{ + while (tohost) + __check_fromhost(); + tohost = TOHOST_CMD(dev, cmd, data); +} + +int htif_console_getchar() +{ +#if __riscv_xlen == 32 + // HTIF devices are not supported on RV32 + return -1; +#endif + + spinlock_lock(&htif_lock); + __check_fromhost(); + int ch = htif_console_buf; + if (ch >= 0) { + htif_console_buf = -1; + __set_tohost(1, 0, 0); + } + spinlock_unlock(&htif_lock); + + return ch - 1; +} + +static void do_tohost_fromhost(uintptr_t dev, uintptr_t cmd, uintptr_t data) +{ + spinlock_lock(&htif_lock); + __set_tohost(dev, cmd, data); + + while (1) { + uint64_t fh = fromhost; + if (fh) { + if (FROMHOST_DEV(fh) == dev && FROMHOST_CMD(fh) == cmd) { + fromhost = 0; + break; + } + __check_fromhost(); + } + } + spinlock_unlock(&htif_lock); +} + +void htif_syscall(uintptr_t arg) +{ + do_tohost_fromhost(0, 0, arg); +} + +void htif_console_putchar(uint8_t ch) +{ +#if __riscv_xlen == 32 + // HTIF devices are not supported on RV32, so proxy a write system call + volatile uint64_t magic_mem[8]; + magic_mem[0] = SYS_write; + magic_mem[1] = 1; + magic_mem[2] = (uintptr_t)&ch; + magic_mem[3] = 1; + do_tohost_fromhost(0, 0, (uintptr_t)magic_mem); +#else + spinlock_lock(&htif_lock); + __set_tohost(1, 1, ch); + spinlock_unlock(&htif_lock); +#endif +} + +void htif_poweroff() +{ + while (1) { + fromhost = 0; + tohost = 1; + } +} diff --git a/abstract-machine/am/src/riscv/spike/htif.h b/abstract-machine/am/src/riscv/spike/htif.h new file mode 100644 index 0000000..73967d8 --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/htif.h @@ -0,0 +1,24 @@ +// See LICENSE for license details. + +#ifndef _RISCV_HTIF_H +#define _RISCV_HTIF_H + +#include + +#if __riscv_xlen == 64 +# define TOHOST_CMD(dev, cmd, payload) \ + (((uint64_t)(dev) << 56) | ((uint64_t)(cmd) << 48) | (uint64_t)(payload)) +#else +# define TOHOST_CMD(dev, cmd, payload) ({ \ + if ((dev) || (cmd)) __builtin_trap(); \ + (payload); }) +#endif +#define FROMHOST_DEV(fromhost_value) ((uint64_t)(fromhost_value) >> 56) +#define FROMHOST_CMD(fromhost_value) ((uint64_t)(fromhost_value) << 8 >> 56) +#define FROMHOST_DATA(fromhost_value) ((uint64_t)(fromhost_value) << 16 >> 16) + +void htif_console_putchar(uint8_t); +int htif_console_getchar(); +void htif_poweroff() __attribute__((noreturn)); + +#endif diff --git a/abstract-machine/am/src/riscv/spike/ioe.c b/abstract-machine/am/src/riscv/spike/ioe.c new file mode 100644 index 0000000..8ab29fc --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/ioe.c @@ -0,0 +1,27 @@ +#include +#include + +void __am_timer_init(); +void __am_timer_rtc(AM_TIMER_RTC_T *); +void __am_timer_uptime(AM_TIMER_UPTIME_T *); + +static void __am_timer_config(AM_TIMER_CONFIG_T *cfg) { cfg->present = true; cfg->has_rtc = true; } + +typedef void (*handler_t)(void *buf); +static void *lut[128] = { + [AM_TIMER_CONFIG] = __am_timer_config, + [AM_TIMER_RTC ] = __am_timer_rtc, + [AM_TIMER_UPTIME] = __am_timer_uptime, +}; + +static void fail(void *buf) { panic("access nonexist register"); } + +bool ioe_init() { + for (int i = 0; i < LENGTH(lut); i++) + if (!lut[i]) lut[i] = fail; + __am_timer_init(); + return true; +} + +void ioe_read (int reg, void *buf) { ((handler_t)lut[reg])(buf); } +void ioe_write(int reg, void *buf) { ((handler_t)lut[reg])(buf); } diff --git a/abstract-machine/am/src/riscv/spike/linker.ld b/abstract-machine/am/src/riscv/spike/linker.ld new file mode 100644 index 0000000..9db82aa --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/linker.ld @@ -0,0 +1,35 @@ +ENTRY(_start) + +SECTIONS { + . = 0x80000000; + .text : { + *(entry) + *(.text*) + } + etext = .; + _etext = .; + .rodata : { + *(.rodata*) + } + .htif : { + PROVIDE(__htif_base = . ); + *(.htif) + } + .data : { + *(.data) + } + edata = .; + _data = .; + .bss : { + _bss_start = .; + *(.bss*) + *(.sbss*) + *(.scommon) + } + _stack_top = ALIGN(0x1000); + . = _stack_top + 0x8000; + _stack_pointer = .; + end = .; + _end = .; + _heap_start = ALIGN(0x1000); +} diff --git a/abstract-machine/am/src/riscv/spike/start.S b/abstract-machine/am/src/riscv/spike/start.S new file mode 100644 index 0000000..3e56e5c --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/start.S @@ -0,0 +1,8 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + mv s0, zero + la sp, _stack_pointer + jal _trm_init diff --git a/abstract-machine/am/src/riscv/spike/timer.c b/abstract-machine/am/src/riscv/spike/timer.c new file mode 100644 index 0000000..abe6783 --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/timer.c @@ -0,0 +1,30 @@ +#include + +static uint64_t boot_time = 0; + +#define CLINT_MMIO 0x2000000ul +#define TIME_BASE 0xbff8 + +static uint64_t read_time() { + uint32_t lo = *(volatile uint32_t *)(CLINT_MMIO + TIME_BASE + 0); + uint32_t hi = *(volatile uint32_t *)(CLINT_MMIO + TIME_BASE + 4); + uint64_t time = ((uint64_t)hi << 32) | lo; + return time / 10; +} + +void __am_timer_uptime(AM_TIMER_UPTIME_T *uptime) { + uptime->us = read_time() - boot_time; +} + +void __am_timer_init() { + boot_time = read_time(); +} + +void __am_timer_rtc(AM_TIMER_RTC_T *rtc) { + rtc->second = 0; + rtc->minute = 0; + rtc->hour = 0; + rtc->day = 0; + rtc->month = 0; + rtc->year = 1900; +} diff --git a/abstract-machine/am/src/riscv/spike/trm.c b/abstract-machine/am/src/riscv/spike/trm.c new file mode 100644 index 0000000..b193bc9 --- /dev/null +++ b/abstract-machine/am/src/riscv/spike/trm.c @@ -0,0 +1,34 @@ +#include +#include +#include +#include "htif.h" + +extern char _heap_start; +int main(const char *args); + +extern char _pmem_start; +#define PMEM_SIZE (128 * 1024 * 1024) +#define PMEM_END ((uintptr_t)0x80000000 + PMEM_SIZE) + +Area heap = RANGE(&_heap_start, PMEM_END); +#ifndef MAINARGS +#define MAINARGS "" +#endif +static const char mainargs[] = MAINARGS; + +void putch(char ch) { + htif_console_putchar(ch); +} + +void halt(int code) { + printf("Exit with code = %d\n", code); + htif_poweroff(); + + // should not reach here + while (1); +} + +void _trm_init() { + int ret = main(mainargs); + halt(ret); +} diff --git a/abstract-machine/am/src/x86/nemu/cte.c b/abstract-machine/am/src/x86/nemu/cte.c new file mode 100644 index 0000000..e5c5bc7 --- /dev/null +++ b/abstract-machine/am/src/x86/nemu/cte.c @@ -0,0 +1,67 @@ +#include +#include +#include + +#define NR_IRQ 256 // IDT size +#define SEG_KCODE 1 +#define SEG_KDATA 2 + +static Context* (*user_handler)(Event, Context*) = NULL; + +void __am_irq0(); +void __am_vecsys(); +void __am_vectrap(); +void __am_vecnull(); + + +Context* __am_irq_handle(Context *c) { + if (user_handler) { + Event ev = {0}; + switch (c->irq) { + default: ev.event = EVENT_ERROR; break; + } + + c = user_handler(ev, c); + assert(c != NULL); + } + + return c; +} + +bool cte_init(Context*(*handler)(Event, Context*)) { + static GateDesc32 idt[NR_IRQ]; + + // initialize IDT + for (unsigned int i = 0; i < NR_IRQ; i ++) { + idt[i] = GATE32(STS_TG, KSEL(SEG_KCODE), __am_vecnull, DPL_KERN); + } + + // ----------------------- interrupts ---------------------------- + idt[32] = GATE32(STS_IG, KSEL(SEG_KCODE), __am_irq0, DPL_KERN); + // ---------------------- system call ---------------------------- + idt[0x80] = GATE32(STS_TG, KSEL(SEG_KCODE), __am_vecsys, DPL_USER); + idt[0x81] = GATE32(STS_TG, KSEL(SEG_KCODE), __am_vectrap, DPL_KERN); + + set_idt(idt, sizeof(idt)); + + // register event handler + user_handler = handler; + + return true; +} + + +Context* kcontext(Area kstack, void (*entry)(void *), void *arg) { + return NULL; +} + +void yield() { + asm volatile("int $0x81"); +} + +bool ienabled() { + return false; +} + +void iset(bool enable) { +} diff --git a/abstract-machine/am/src/x86/nemu/start.S b/abstract-machine/am/src/x86/nemu/start.S new file mode 100644 index 0000000..66959c1 --- /dev/null +++ b/abstract-machine/am/src/x86/nemu/start.S @@ -0,0 +1,8 @@ +.section entry, "ax" +.globl _start +.type _start, @function + +_start: + mov $0, %ebp + mov $_stack_pointer, %esp + call _trm_init # never return diff --git a/abstract-machine/am/src/x86/nemu/trap.S b/abstract-machine/am/src/x86/nemu/trap.S new file mode 100644 index 0000000..878d068 --- /dev/null +++ b/abstract-machine/am/src/x86/nemu/trap.S @@ -0,0 +1,22 @@ +#----|------------entry------------|---irq id---|-----handler-----| +.globl __am_vecsys; __am_vecsys: pushl $0x80; jmp __am_asm_trap +.globl __am_vectrap; __am_vectrap: pushl $0x81; jmp __am_asm_trap +.globl __am_irq0; __am_irq0: pushl $32; jmp __am_asm_trap +.globl __am_vecnull; __am_vecnull: pushl $-1; jmp __am_asm_trap + + +__am_asm_trap: + pushal + + pushl $0 + + pushl %esp + call __am_irq_handle + + addl $4, %esp + + addl $4, %esp + popal + addl $4, %esp + + iret diff --git a/abstract-machine/am/src/x86/nemu/vme.c b/abstract-machine/am/src/x86/nemu/vme.c new file mode 100644 index 0000000..3aaf483 --- /dev/null +++ b/abstract-machine/am/src/x86/nemu/vme.c @@ -0,0 +1,64 @@ +#include +#include +#include + +static AddrSpace kas = {}; +static void* (*pgalloc_usr)(int) = NULL; +static void (*pgfree_usr)(void*) = NULL; +static int vme_enable = 0; + +static Area segments[] = { // Kernel memory mappings + NEMU_PADDR_SPACE +}; + +#define USER_SPACE RANGE(0x40000000, 0xc0000000) + +bool vme_init(void* (*pgalloc_f)(int), void (*pgfree_f)(void*)) { + pgalloc_usr = pgalloc_f; + pgfree_usr = pgfree_f; + + kas.ptr = pgalloc_f(PGSIZE); + + int i; + for (i = 0; i < LENGTH(segments); i ++) { + void *va = segments[i].start; + for (; va < segments[i].end; va += PGSIZE) { + map(&kas, va, va, 0); + } + } + + set_cr3(kas.ptr); + set_cr0(get_cr0() | CR0_PG); + vme_enable = 1; + + return true; +} + +void protect(AddrSpace *as) { + PTE *updir = (PTE*)(pgalloc_usr(PGSIZE)); + as->ptr = updir; + as->area = USER_SPACE; + as->pgsize = PGSIZE; + // map kernel space + memcpy(updir, kas.ptr, PGSIZE); +} + +void unprotect(AddrSpace *as) { +} + +void __am_get_cur_as(Context *c) { + c->cr3 = (vme_enable ? (void *)get_cr3() : NULL); +} + +void __am_switch(Context *c) { + if (vme_enable && c->cr3 != NULL) { + set_cr3(c->cr3); + } +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { +} + +Context* ucontext(AddrSpace *as, Area kstack, void *entry) { + return NULL; +} diff --git a/abstract-machine/am/src/x86/qemu/boot/Makefile b/abstract-machine/am/src/x86/qemu/boot/Makefile new file mode 100644 index 0000000..fa583f3 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/boot/Makefile @@ -0,0 +1,8 @@ +SRCS := start.S main.c +bootblock.o: $(SRCS) Makefile + @echo + CC $(SRCS) + @$(CROSS_COMPILE)gcc -static -m32 -fno-pic -Os -nostdlib -Ttext 0x7c00 -I$(AM_HOME)/am/src -o bootblock.o $(SRCS) + @python3 genboot.py bootblock.o + +clean: + rm -rf *.o diff --git a/abstract-machine/am/src/x86/qemu/boot/genboot.py b/abstract-machine/am/src/x86/qemu/boot/genboot.py new file mode 100644 index 0000000..a35d548 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/boot/genboot.py @@ -0,0 +1,14 @@ +import os, sys, pathlib, subprocess + +f = pathlib.Path(sys.argv[1]) +try: + objcopy = os.getenv('CROSS_COMPILE', '') + 'objcopy' + data = subprocess.run( + [objcopy, '-S', '-O', 'binary', '-j', '.text', f, '/dev/stdout'], + capture_output=True).stdout + assert len(data) <= 510 + data += b'\0' * (510 - len(data)) + b'\x55\xaa' + f.write_bytes(data) +except: + f.unlink() + raise diff --git a/abstract-machine/am/src/x86/qemu/boot/main.c b/abstract-machine/am/src/x86/qemu/boot/main.c new file mode 100644 index 0000000..13910d1 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/boot/main.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#define SECTSIZE 512 +#define ARGSIZE 1024 + +static inline void wait_disk(void) { + while ((inb(0x1f7) & 0xc0) != 0x40); +} + +static inline void read_disk(void *buf, int sect) { + wait_disk(); + outb(0x1f2, 1); + outb(0x1f3, sect); + outb(0x1f4, sect >> 8); + outb(0x1f5, sect >> 16); + outb(0x1f6, (sect >> 24) | 0xE0); + outb(0x1f7, 0x20); + wait_disk(); + for (int i = 0; i < SECTSIZE / 4; i ++) { + ((uint32_t *)buf)[i] = inl(0x1f0); + } +} + +static inline void copy_from_disk(void *buf, int nbytes, int disk_offset) { + uint32_t cur = (uint32_t)buf & ~(SECTSIZE - 1); + uint32_t ed = (uint32_t)buf + nbytes; + uint32_t sect = (disk_offset / SECTSIZE) + (ARGSIZE / SECTSIZE) + 1; + for(; cur < ed; cur += SECTSIZE, sect ++) + read_disk((void *)cur, sect); +} + +static void load_program(uint32_t filesz, uint32_t memsz, uint32_t paddr, uint32_t offset) { + copy_from_disk((void *)paddr, filesz, offset); + char *bss = (void *)(paddr + filesz); + for (uint32_t i = filesz; i != memsz; i++) { + *bss++ = 0; + } +} + +static void load_elf64(Elf64_Ehdr *elf) { + Elf64_Phdr *ph = (Elf64_Phdr *)((char *)elf + elf->e_phoff); + for (int i = 0; i < elf->e_phnum; i++, ph++) { + load_program( + (uint32_t)ph->p_filesz, + (uint32_t)ph->p_memsz, + (uint32_t)ph->p_paddr, + (uint32_t)ph->p_offset + ); + } +} + +static void load_elf32(Elf32_Ehdr *elf) { + Elf32_Phdr *ph = (Elf32_Phdr *)((char *)elf + elf->e_phoff); + for (int i = 0; i < elf->e_phnum; i++, ph++) { + load_program( + (uint32_t)ph->p_filesz, + (uint32_t)ph->p_memsz, + (uint32_t)ph->p_paddr, + (uint32_t)ph->p_offset + ); + } +} + +void load_kernel(void) { + Elf32_Ehdr *elf32 = (void *)0x8000; + Elf64_Ehdr *elf64 = (void *)0x8000; + int is_ap = boot_record()->is_ap; + + if (!is_ap) { + // load argument (string) to memory + copy_from_disk((void *)MAINARG_ADDR, 1024, -1024); + // load elf header to memory + copy_from_disk(elf32, 4096, 0); + if (elf32->e_machine == EM_X86_64) { + load_elf64(elf64); + } else { + load_elf32(elf32); + } + } else { + // everything should be loaded + } + + if (elf32->e_machine == EM_X86_64) { + ((void(*)())(uint32_t)elf64->e_entry)(); + } else { + ((void(*)())(uint32_t)elf32->e_entry)(); + } +} diff --git a/abstract-machine/am/src/x86/qemu/boot/start.S b/abstract-machine/am/src/x86/qemu/boot/start.S new file mode 100644 index 0000000..1e5cdd9 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/boot/start.S @@ -0,0 +1,60 @@ +#define CR0_PE 0x00000001 + +#define GDT_ENTRY(n) \ + ((n) << 3) + +#define SEG_NULLASM \ + .word 0, 0; \ + .byte 0, 0, 0, 0 + +#define SEG_ASM(type, base, lim) \ + .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +.code16 +.globl _start +_start: + cli + + xorw %ax, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + +# Set a 640 x 480 x 32 video mode + mov $0x4f01, %ax + mov $0x0112, %cx + mov $0x4000, %di + int $0x10 + + mov $0x4f02, %ax + mov $0x4112, %bx + int $0x10 + + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + ljmp $GDT_ENTRY(1), $start32 + +.code32 +start32: + movw $GDT_ENTRY(2), %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + movl $0xa000, %esp + call load_kernel + +# GDT +.p2align 2 +gdt: + SEG_NULLASM + SEG_ASM(0xA, 0x0, 0xffffffff) + SEG_ASM(0x2, 0x0, 0xffffffff) + +gdtdesc: + .word (gdtdesc - gdt - 1) + .long gdt diff --git a/abstract-machine/am/src/x86/qemu/cte.c b/abstract-machine/am/src/x86/qemu/cte.c new file mode 100644 index 0000000..42d5669 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/cte.c @@ -0,0 +1,165 @@ +#include "x86-qemu.h" + +static Context* (*user_handler)(Event, Context*) = NULL; +#if __x86_64__ +static GateDesc64 idt[NR_IRQ]; +#define GATE GATE64 +#else +static GateDesc32 idt[NR_IRQ]; +#define GATE GATE32 +#endif + +#define IRQHANDLE_DECL(id, dpl, err) \ + void __am_irq##id(); + +IRQS(IRQHANDLE_DECL) +void __am_irqall(); +void __am_kcontext_start(); + +void __am_irq_handle(struct trap_frame *tf) { + Context *saved_ctx = &tf->saved_context; + Event ev = { + .event = EVENT_NULL, + .cause = 0, .ref = 0, + .msg = "(no message)", + }; + +#if __x86_64 + saved_ctx->rip = tf->rip; + saved_ctx->cs = tf->cs; + saved_ctx->rflags = tf->rflags; + saved_ctx->rsp = tf->rsp; + saved_ctx->rsp0 = CPU->tss.rsp0; + saved_ctx->ss = tf->ss; +#else + saved_ctx->eip = tf->eip; + saved_ctx->cs = tf->cs; + saved_ctx->eflags = tf->eflags; + saved_ctx->esp0 = CPU->tss.esp0; + saved_ctx->ss3 = USEL(SEG_UDATA); + // no ss/esp saved for DPL_KERNEL + saved_ctx->esp = (tf->cs & DPL_USER ? tf->esp : (uint32_t)(tf + 1) - 8); +#endif + saved_ctx->cr3 = (void *)get_cr3(); + + #define IRQ T_IRQ0 + + #define MSG(m) ev.msg = m; + + if (IRQ 0 <= tf->irq && tf->irq < IRQ 32) { + __am_lapic_eoi(); + } + + switch (tf->irq) { + case IRQ 0: MSG("timer interrupt (lapic)") + ev.event = EVENT_IRQ_TIMER; break; + case IRQ 1: MSG("I/O device IRQ1 (keyboard)") + ev.event = EVENT_IRQ_IODEV; break; + case IRQ 4: MSG("I/O device IRQ4 (COM1)") + ev.event = EVENT_IRQ_IODEV; break; + case EX_SYSCALL: MSG("int $0x80 system call") + ev.event = EVENT_SYSCALL; break; + case EX_YIELD: MSG("int $0x81 yield") + ev.event = EVENT_YIELD; break; + case EX_DE: MSG("DE #0 divide by zero") + ev.event = EVENT_ERROR; break; + case EX_UD: MSG("UD #6 invalid opcode") + ev.event = EVENT_ERROR; break; + case EX_NM: MSG("NM #7 coprocessor error") + ev.event = EVENT_ERROR; break; + case EX_DF: MSG("DF #8 double fault") + ev.event = EVENT_ERROR; break; + case EX_TS: MSG("TS #10 invalid TSS") + ev.event = EVENT_ERROR; break; + case EX_NP: MSG("NP #11 segment/gate not present") + ev.event = EVENT_ERROR; break; + case EX_SS: MSG("SS #12 stack fault") + ev.event = EVENT_ERROR; break; + case EX_GP: MSG("GP #13, general protection fault") + ev.event = EVENT_ERROR; break; + case EX_PF: MSG("PF #14, page fault, @cause: PROT_XXX") + ev.event = EVENT_PAGEFAULT; + if (tf->errcode & 0x1) ev.cause |= MMAP_NONE; + if (tf->errcode & 0x2) ev.cause |= MMAP_WRITE; + else ev.cause |= MMAP_READ; + ev.ref = get_cr2(); + break; + default: MSG("unrecognized interrupt/exception") + ev.event = EVENT_ERROR; + ev.cause = tf->errcode; + break; + } + + Context *ret_ctx = user_handler(ev, saved_ctx); + panic_on(!ret_ctx, "returning to NULL context"); + + if (ret_ctx->cr3) { + set_cr3(ret_ctx->cr3); +#if __x86_64__ + CPU->tss.rsp0 = ret_ctx->rsp0; +#else + CPU->tss.ss0 = KSEL(SEG_KDATA); + CPU->tss.esp0 = ret_ctx->esp0; +#endif + } + + __am_iret(ret_ctx); +} + +bool cte_init(Context *(*handler)(Event, Context *)) { + panic_on(cpu_current() != 0, "init CTE in non-bootstrap CPU"); + panic_on(!handler, "no interrupt handler"); + + for (int i = 0; i < NR_IRQ; i ++) { + idt[i] = GATE(STS_TG, KSEL(SEG_KCODE), __am_irqall, DPL_KERN); + } +#define IDT_ENTRY(id, dpl, err) \ + idt[id] = GATE(STS_TG, KSEL(SEG_KCODE), __am_irq##id, DPL_##dpl); + IRQS(IDT_ENTRY) + + user_handler = handler; + return true; +} + +void yield() { + interrupt(0x81); +} + +bool ienabled() { + return (get_efl() & FL_IF) != 0; +} + +void iset(bool enable) { + if (enable) sti(); + else cli(); +} + +void __am_panic_on_return() { panic("kernel context returns"); } + +Context* kcontext(Area kstack, void (*entry)(void *), void *arg) { + Context *ctx = kstack.end - sizeof(Context); + *ctx = (Context) { 0 }; + +#if __x86_64__ + ctx->cs = KSEL(SEG_KCODE); + ctx->rip = (uintptr_t)__am_kcontext_start; + ctx->rflags = FL_IF; + ctx->rsp = (uintptr_t)kstack.end; +#else + ctx->ds = KSEL(SEG_KDATA); + ctx->cs = KSEL(SEG_KCODE); + ctx->eip = (uintptr_t)__am_kcontext_start; + ctx->eflags = FL_IF; + ctx->esp = (uintptr_t)kstack.end; +#endif + + ctx->GPR1 = (uintptr_t)arg; + ctx->GPR2 = (uintptr_t)entry; + + return ctx; +} + +void __am_percpu_initirq() { + __am_ioapic_enable(IRQ_KBD, 0); + __am_ioapic_enable(IRQ_COM1, 0); + set_idt(idt, sizeof(idt)); +} diff --git a/abstract-machine/am/src/x86/qemu/ioe.c b/abstract-machine/am/src/x86/qemu/ioe.c new file mode 100644 index 0000000..71a1e16 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/ioe.c @@ -0,0 +1,471 @@ +#include "x86-qemu.h" +#include // TODO: delete + +// UART +// ==================================================== + +#define COM1 0x3f8 + +static int uart_init() { + outb(COM1 + 2, 0); + outb(COM1 + 3, 0x80); + outb(COM1 + 0, 115200 / 9600); + outb(COM1 + 1, 0); + outb(COM1 + 3, 0x03); + outb(COM1 + 4, 0); + outb(COM1 + 1, 0x01); + inb (COM1 + 2); + inb (COM1 + 0); + return 0; +} + +static void uart_config(AM_UART_CONFIG_T *cfg) { + cfg->present = true; +} + +static void uart_tx(AM_UART_TX_T *send) { + outb(COM1, send->data); +} + +static void uart_rx(AM_UART_RX_T *recv) { + recv->data = (inb(COM1 + 5) & 0x1) ? inb(COM1) : -1; +} + +// Timer +// ==================================================== + +static AM_TIMER_RTC_T boot_date; +static uint32_t freq_mhz = 2000; +static uint64_t uptsc; +static void timer_rtc(AM_TIMER_RTC_T *rtc); + +static inline int read_rtc(int reg) { + outb(0x70, reg); + int ret = inb(0x71); + return (ret & 0xf) + (ret >> 4) * 10; +} + +static void read_rtc_async(AM_TIMER_RTC_T *rtc) { + *rtc = (AM_TIMER_RTC_T) { + .second = read_rtc(0), + .minute = read_rtc(2), + .hour = read_rtc(4), + .day = read_rtc(7), + .month = read_rtc(8), + .year = read_rtc(9) + 2000, + }; +} + +static void wait_sec(AM_TIMER_RTC_T *t1) { + AM_TIMER_RTC_T t0; + while (1) { + read_rtc_async(&t0); + for (int volatile i = 0; i < 100000; i++) ; + read_rtc_async(t1); + if (t0.second != t1->second) { + return; + } + } +} + +static uint32_t estimate_freq() { + AM_TIMER_RTC_T rtc1, rtc2; + uint64_t tsc1, tsc2, t1, t2; + wait_sec(&rtc1); tsc1 = rdtsc(); t1 = rtc1.minute * 60 + rtc1.second; + wait_sec(&rtc2); tsc2 = rdtsc(); t2 = rtc2.minute * 60 + rtc2.second; + if (t1 >= t2) return estimate_freq(); // passed an hour; try again + return ((tsc2 - tsc1) >> 20) / (t2 - t1); +} + +static void timer_init() { + freq_mhz = estimate_freq(); + timer_rtc(&boot_date); + uptsc = rdtsc(); +} + +static void timer_config(AM_TIMER_CONFIG_T *cfg) { + cfg->present = cfg->has_rtc = true; +} + +static void timer_rtc(AM_TIMER_RTC_T *rtc) { + int tmp; + do { + read_rtc_async(rtc); + tmp = read_rtc(0); + } while (tmp != rtc->second); +} + +static void timer_uptime(AM_TIMER_UPTIME_T *upt) { + upt->us = (rdtsc() - uptsc) / freq_mhz; +} + +// Input +// ==================================================== + +static int keylut[128] = { + [0x01] = AM_KEY_ESCAPE, [0x02] = AM_KEY_1, [0x03] = AM_KEY_2, + [0x04] = AM_KEY_3, [0x05] = AM_KEY_4, [0x06] = AM_KEY_5, [0x07] = AM_KEY_6, + [0x08] = AM_KEY_7, [0x09] = AM_KEY_8, [0x0a] = AM_KEY_9, [0x0b] = AM_KEY_0, + [0x0c] = AM_KEY_MINUS, [0x0d] = AM_KEY_EQUALS, + [0x0e] = AM_KEY_BACKSPACE, [0x0f] = AM_KEY_TAB, + [0x10] = AM_KEY_Q, [0x11] = AM_KEY_W, [0x12] = AM_KEY_E, [0x13] = AM_KEY_R, + [0x14] = AM_KEY_T, [0x15] = AM_KEY_Y, [0x16] = AM_KEY_U, [0x17] = AM_KEY_I, + [0x18] = AM_KEY_O, [0x19] = AM_KEY_P, [0x1a] = AM_KEY_LEFTBRACKET, + [0x1b] = AM_KEY_RIGHTBRACKET, [0x1c] = AM_KEY_RETURN, + [0x1d] = AM_KEY_LCTRL, [0x1e] = AM_KEY_A, [0x1f] = AM_KEY_S, + [0x20] = AM_KEY_D, [0x21] = AM_KEY_F, [0x22] = AM_KEY_G, [0x23] = AM_KEY_H, + [0x24] = AM_KEY_J, [0x25] = AM_KEY_K, [0x26] = AM_KEY_L, + [0x27] = AM_KEY_SEMICOLON, [0x28] = AM_KEY_APOSTROPHE, + [0x29] = AM_KEY_GRAVE, [0x2a] = AM_KEY_LSHIFT, + [0x2b] = AM_KEY_BACKSLASH, [0x2c] = AM_KEY_Z, [0x2d] = AM_KEY_X, + [0x2e] = AM_KEY_C, [0x2f] = AM_KEY_V, [0x30] = AM_KEY_B, [0x31] = AM_KEY_N, + [0x32] = AM_KEY_M, [0x33] = AM_KEY_COMMA, [0x34] = AM_KEY_PERIOD, + [0x35] = AM_KEY_SLASH, [0x36] = AM_KEY_RSHIFT, [0x38] = AM_KEY_LALT, + [0x38] = AM_KEY_RALT, [0x39] = AM_KEY_SPACE, [0x3a] = AM_KEY_CAPSLOCK, + [0x3b] = AM_KEY_F1, [0x3c] = AM_KEY_F2, [0x3d] = AM_KEY_F3, + [0x3e] = AM_KEY_F4, [0x3f] = AM_KEY_F5, [0x40] = AM_KEY_F6, + [0x41] = AM_KEY_F7, [0x42] = AM_KEY_F8, [0x43] = AM_KEY_F9, + [0x44] = AM_KEY_F10, [0x48] = AM_KEY_INSERT, + [0x4b] = AM_KEY_HOME, [0x4d] = AM_KEY_END, [0x50] = AM_KEY_DELETE, + [0x57] = AM_KEY_F11, [0x58] = AM_KEY_F12, [0x5b] = AM_KEY_APPLICATION, +}; + +static void input_config(AM_INPUT_CONFIG_T *cfg) { + cfg->present = true; +} + +static void input_keybrd(AM_INPUT_KEYBRD_T *ev) { + if (inb(0x64) & 0x1) { + int code = inb(0x60) & 0xff; + ev->keydown = code < 128; + ev->keycode = keylut[code & 0x7f]; + } else { + ev->keydown = false; + ev->keycode = AM_KEY_NONE; + } +} + +// GPU (Frame Buffer and 2D Accelerated Graphics) +// ==================================================== + +#define VMEM_SIZE (512 << 10) + +struct vbe_info { + uint8_t ignore[18]; + uint16_t width; + uint16_t height; + uint8_t ignore1[18]; + uint32_t framebuffer; +} __attribute__ ((packed)); + +static inline uint8_t R(uint32_t p) { return p >> 16; } +static inline uint8_t G(uint32_t p) { return p >> 8; } +static inline uint8_t B(uint32_t p) { return p; } + +struct pixel { + uint8_t b, g, r; +} __attribute__ ((packed)); + +static struct pixel *fb; +static uint8_t vmem[VMEM_SIZE], vbuf[VMEM_SIZE], *vbuf_head; + +static struct gpu_canvas display; +static inline void *to_host(gpuptr_t ptr) { return ptr == AM_GPU_NULL ? NULL : vmem + ptr; } + +static void gpu_init() { + struct vbe_info *info = (struct vbe_info *)0x00004000; + display.w = info->width; + display.h = info->height; + fb = (void *)((intptr_t)(info->framebuffer)); +} + +static void gpu_config(AM_GPU_CONFIG_T *cfg) { + *cfg = (AM_GPU_CONFIG_T) { + .present = true, + .width = display.w, .height = display.h, + .vmemsz = sizeof(vmem), + }; +} + +static void gpu_fbdraw(AM_GPU_FBDRAW_T *draw) { + int x = draw->x, y = draw->y, w = draw->w, h = draw->h; + int W = display.w, H = display.h; + uint32_t *pixels = draw->pixels; + int len = (x + w >= W) ? W - x : w; + for (int j = 0; j < h; j ++, pixels += w) { + if (y + j < H) { + struct pixel *px = &fb[x + (j + y) * W]; + for (int i = 0; i < len; i ++, px ++) { + uint32_t p = pixels[i]; + *px = (struct pixel) { .r = R(p), .g = G(p), .b = B(p) }; + } + } + } +} + +static void gpu_status(AM_GPU_STATUS_T *stat) { + stat->ready = true; +} + +static void gpu_memcpy(AM_GPU_MEMCPY_T *params) { + char *src = params->src, *dst = to_host(params->dest); + for (int i = 0; i < params->size; i++) + dst[i] = src[i]; +} + +static void *vbuf_alloc(int size) { + void *ret = vbuf_head; + vbuf_head += size; + panic_on(vbuf_head > vbuf + sizeof(vbuf), "no memory"); + for (int i = 0; i < size; i++) + ((char *)ret)[i] = 0; + return ret; +} + +static struct pixel *render(struct gpu_canvas *cv, struct gpu_canvas *parent, struct pixel *px) { + struct pixel *px_local; + int W = parent->w, w, h; + + switch (cv->type) { + case AM_GPU_TEXTURE: { + w = cv->texture.w; h = cv->texture.h; + px_local = to_host(cv->texture.pixels); + break; + } + case AM_GPU_SUBTREE: { + w = cv->w; h = cv->h; + px_local = vbuf_alloc(w * h * sizeof(struct pixel)); + for (struct gpu_canvas *ch = to_host(cv->child); ch; ch = to_host(ch->sibling)) { + render(ch, cv, px_local); + } + break; + } + default: + panic("invalid node"); + } + + // draw local canvas (w * h) -> px (x1, y1) - (x1 + w1, y1 + h1) + for (int i = 0; i < cv->w1; i++) + for (int j = 0; j < cv->h1; j++) { + int x = cv->x1 + i, y = cv->y1 + j; + px[W * y + x] = px_local[w * (j * h / cv->h1) + (i * w / cv->w1)]; + } + return 0; +} + +static void gpu_render(AM_GPU_RENDER_T *ren) { + vbuf_head = vbuf; + render(to_host(ren->root), &display, fb); +} + +// Disk (ATA0) +// ==================================================== + +#define BLKSZ 512 +#define DISKSZ (64 << 20) + +static void disk_config(AM_DISK_CONFIG_T *cfg) { + cfg->present = true; + cfg->blksz = BLKSZ; + cfg->blkcnt = DISKSZ / BLKSZ; +} + +static void disk_status(AM_DISK_STATUS_T *status) { + status->ready = true; +} + +static inline void wait_disk(void) { + while ((inb(0x1f7) & 0xc0) != 0x40); +} + +static void disk_blkio(AM_DISK_BLKIO_T *bio) { + uint32_t blkno = bio->blkno, remain = bio->blkcnt; + uint32_t *ptr = bio->buf; + for (remain = bio->blkcnt; remain; remain--, blkno++) { + wait_disk(); + outb(0x1f2, 1); + outb(0x1f3, blkno); + outb(0x1f4, blkno >> 8); + outb(0x1f5, blkno >> 16); + outb(0x1f6, (blkno >> 24) | 0xe0); + outb(0x1f7, bio->write? 0x30 : 0x20); + wait_disk(); + if (bio->write) { + for (int i = 0; i < BLKSZ / 4; i ++) + outl(0x1f0, *ptr++); + } else { + for (int i = 0; i < BLKSZ / 4; i ++) + *ptr++ = inl(0x1f0); + } + } +} + +// ==================================================== + +static void audio_config(AM_AUDIO_CONFIG_T *cfg) { cfg->present = false; } +static void net_config(AM_NET_CONFIG_T *cfg) { cfg->present = false; } +static void fail(void *buf) { panic("access nonexist register"); } + +typedef void (*handler_t)(void *buf); +static void *lut[128] = { + [AM_UART_CONFIG ] = uart_config, + [AM_UART_TX ] = uart_tx, + [AM_UART_RX ] = uart_rx, + [AM_TIMER_CONFIG] = timer_config, + [AM_TIMER_RTC ] = timer_rtc, + [AM_TIMER_UPTIME] = timer_uptime, + [AM_INPUT_CONFIG] = input_config, + [AM_INPUT_KEYBRD] = input_keybrd, + [AM_GPU_CONFIG ] = gpu_config, + [AM_GPU_FBDRAW ] = gpu_fbdraw, + [AM_GPU_STATUS ] = gpu_status, + [AM_GPU_MEMCPY ] = gpu_memcpy, + [AM_GPU_RENDER ] = gpu_render, + [AM_AUDIO_CONFIG] = audio_config, + [AM_DISK_CONFIG ] = disk_config, + [AM_DISK_STATUS ] = disk_status, + [AM_DISK_BLKIO ] = disk_blkio, + [AM_NET_CONFIG ] = net_config, +}; + + +bool ioe_init() { + panic_on(cpu_current() != 0, "init IOE in non-bootstrap CPU"); + + for (int i = 0; i < LENGTH(lut); i++) + if (!lut[i]) lut[i] = fail; + + uart_init(); + timer_init(); + gpu_init(); + + return true; +} + +void ioe_read (int reg, void *buf) { ((handler_t)lut[reg])(buf); } +void ioe_write(int reg, void *buf) { ((handler_t)lut[reg])(buf); } + +// LAPIC/IOAPIC (from xv6) + +#define ID (0x0020/4) // ID +#define VER (0x0030/4) // Version +#define TPR (0x0080/4) // Task Priority +#define EOI (0x00B0/4) // EOI +#define SVR (0x00F0/4) // Spurious Interrupt Vector + #define ENABLE 0x00000100 // Unit Enable +#define ESR (0x0280/4) // Error Status +#define ICRLO (0x0300/4) // Interrupt Command + #define INIT 0x00000500 // INIT/RESET + #define STARTUP 0x00000600 // Startup IPI + #define DELIVS 0x00001000 // Delivery status + #define ASSERT 0x00004000 // Assert interrupt (vs deassert) + #define DEASSERT 0x00000000 + #define LEVEL 0x00008000 // Level triggered + #define BCAST 0x00080000 // Send to all APICs, including self. + #define BUSY 0x00001000 + #define FIXED 0x00000000 +#define ICRHI (0x0310/4) // Interrupt Command [63:32] +#define TIMER (0x0320/4) // Local Vector Table 0 (TIMER) + #define X1 0x0000000B // divide counts by 1 + #define PERIODIC 0x00020000 // Periodic +#define PCINT (0x0340/4) // Performance Counter LVT +#define LINT0 (0x0350/4) // Local Vector Table 1 (LINT0) +#define LINT1 (0x0360/4) // Local Vector Table 2 (LINT1) +#define ERROR (0x0370/4) // Local Vector Table 3 (ERROR) + #define MASKED 0x00010000 // Interrupt masked +#define TICR (0x0380/4) // Timer Initial Count +#define TCCR (0x0390/4) // Timer Current Count +#define TDCR (0x03E0/4) // Timer Divide Configuration + +#define IOAPIC_ADDR 0xFEC00000 // Default physical address of IO APIC +#define REG_ID 0x00 // Register index: ID +#define REG_VER 0x01 // Register index: version +#define REG_TABLE 0x10 // Redirection table base + +#define INT_DISABLED 0x00010000 // Interrupt disabled +#define INT_LEVEL 0x00008000 // Level-triggered (vs edge-) +#define INT_ACTIVELOW 0x00002000 // Active low (vs high) +#define INT_LOGICAL 0x00000800 // Destination is CPU id (vs APIC ID) + +volatile unsigned int *__am_lapic = NULL; // Initialized in mp.c +struct IOAPIC { + uint32_t reg, pad[3], data; +} __attribute__((packed)); +typedef struct IOAPIC IOAPIC; + +static volatile IOAPIC *ioapic; + +static void lapicw(int index, int value) { + __am_lapic[index] = value; + __am_lapic[ID]; +} + +void __am_percpu_initlapic(void) { + lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); + lapicw(TDCR, X1); + lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); + lapicw(TICR, 10000000); + lapicw(LINT0, MASKED); + lapicw(LINT1, MASKED); + if (((__am_lapic[VER]>>16) & 0xFF) >= 4) + lapicw(PCINT, MASKED); + lapicw(ERROR, T_IRQ0 + IRQ_ERROR); + lapicw(ESR, 0); + lapicw(ESR, 0); + lapicw(EOI, 0); + lapicw(ICRHI, 0); + lapicw(ICRLO, BCAST | INIT | LEVEL); + while(__am_lapic[ICRLO] & DELIVS) ; + lapicw(TPR, 0); +} + +void __am_lapic_eoi(void) { + if (__am_lapic) + lapicw(EOI, 0); +} + +void __am_lapic_bootap(uint32_t apicid, void *addr) { + int i; + uint16_t *wrv; + outb(0x70, 0xF); + outb(0x71, 0x0A); + wrv = (unsigned short*)((0x40<<4 | 0x67)); + wrv[0] = 0; + wrv[1] = (uintptr_t)addr >> 4; + + lapicw(ICRHI, apicid<<24); + lapicw(ICRLO, INIT | LEVEL | ASSERT); + lapicw(ICRLO, INIT | LEVEL); + + for (i = 0; i < 2; i++){ + lapicw(ICRHI, apicid<<24); + lapicw(ICRLO, STARTUP | ((uintptr_t)addr>>12)); + } +} + +static unsigned int ioapicread(int reg) { + ioapic->reg = reg; + return ioapic->data; +} + +static void ioapicwrite(int reg, unsigned int data) { + ioapic->reg = reg; + ioapic->data = data; +} + +void __am_ioapic_init(void) { + int i, maxintr; + + ioapic = (volatile IOAPIC*)IOAPIC_ADDR; + maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; + + for (i = 0; i <= maxintr; i++){ + ioapicwrite(REG_TABLE+2*i, INT_DISABLED | (T_IRQ0 + i)); + ioapicwrite(REG_TABLE+2*i+1, 0); + } +} + +void __am_ioapic_enable(int irq, int cpunum) { + ioapicwrite(REG_TABLE+2*irq, T_IRQ0 + irq); + ioapicwrite(REG_TABLE+2*irq+1, cpunum << 24); +} diff --git a/abstract-machine/am/src/x86/qemu/mpe.c b/abstract-machine/am/src/x86/qemu/mpe.c new file mode 100644 index 0000000..5e6e202 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/mpe.c @@ -0,0 +1,55 @@ +#include "x86-qemu.h" + +struct cpu_local __am_cpuinfo[MAX_CPU] = {}; +static void (* volatile user_entry)(); +static int ap_ready = 0; + +static void call_user_entry() { + user_entry(); + panic("MPE entry should not return"); +} + +bool mpe_init(void (*entry)()) { + user_entry = entry; + boot_record()->jmp_code = 0x000bfde9; // (16-bit) jmp (0x7c00) + for (int cpu = 1; cpu < __am_ncpu; cpu++) { + boot_record()->is_ap = 1; + __am_lapic_bootap(cpu, (void *)boot_record()); + while (xchg(&ap_ready, 0) != 1) { + pause(); + } + } + call_user_entry(); + return true; +} + +static void othercpu_entry() { + __am_percpu_init(); + xchg(&ap_ready, 1); + call_user_entry(); +} + +void __am_othercpu_entry() { + stack_switch_call(stack_top(&CPU->stack), othercpu_entry, 0); +} + +int cpu_count() { + return __am_ncpu; +} + +int cpu_current(void) { + return __am_lapic[8] >> 24; +} + +int atomic_xchg(int *addr, int newval) { + return xchg(addr, newval); +} + +void __am_stop_the_world() { + boot_record()->jmp_code = 0x0000feeb; // (16-bit) jmp . + for (int cpu_ = 0; cpu_ < __am_ncpu; cpu_++) { + if (cpu_ != cpu_current()) { + __am_lapic_bootap(cpu_, (void *)boot_record()); + } + } +} diff --git a/abstract-machine/am/src/x86/qemu/start32.S b/abstract-machine/am/src/x86/qemu/start32.S new file mode 100644 index 0000000..2fa5113 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/start32.S @@ -0,0 +1,7 @@ +#include "x86-qemu.h" + +.globl _start +_start: + pushl $MAINARG_ADDR + pushl $0 + jmp _start_c diff --git a/abstract-machine/am/src/x86/qemu/start64.S b/abstract-machine/am/src/x86/qemu/start64.S new file mode 100644 index 0000000..01a10d6 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/start64.S @@ -0,0 +1,69 @@ +#include +#include "x86-qemu.h" + +.code32 +.globl _start +_start: + movl $(PDPT_ADDR | PTE_P | PTE_W), %eax + cmpl (PML4_ADDR), %eax + je .long_mode_init + + movl $(PDPT_ADDR | PTE_P | PTE_W), %eax + movl %eax, (PML4_ADDR) + + movl $0, %ecx + movl $512, %esi // 512 pages + // | +.loop: // x + movl %ecx, %eax // | + shll $30, %eax // | + orl $(PTE_P | PTE_W | PTE_PS), %eax // 1 GiB page + movl %eax, PDPT_ADDR(, %ecx, 8) + + movl %ecx, %eax + shrl $2, %eax + movl %eax, PDPT_ADDR + 4(, %ecx, 8) + + inc %ecx + cmp %esi, %ecx + jne .loop + +.long_mode_init: + movl $PML4_ADDR, %eax + movl %eax, %cr3 // %cr3 = PML4 base + movl $CR4_PAE, %eax + movl %eax, %cr4 // %cr4.PAE = 1 + movl $0xc0000080, %ecx + rdmsr + orl $0x100, %eax + wrmsr // %EFER.LME = 1 + movl %cr0, %eax + orl $CR0_PG, %eax + movl %eax, %cr0 // %cr0.PG = 1 + lgdt gdt_ptr // bootstrap GDT + ljmp $8, $_start64 // should not return + +.code64 +_start64: + movw $0, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + movq $MAINARG_ADDR, %rdi + pushq $0 + jmp _start_c + +.align 16 +gdt_ptr: + .word gdt64_end - gdt64_begin - 1 + .quad gdt64_begin + +gdt64_begin: + .long 0x00000000 // 0: null desc + .long 0x00000000 + .long 0x00000000 // 1: code + .long 0x00209800 +gdt64_end: diff --git a/abstract-machine/am/src/x86/qemu/trap32.S b/abstract-machine/am/src/x86/qemu/trap32.S new file mode 100644 index 0000000..b0b41a8 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/trap32.S @@ -0,0 +1,99 @@ +#include "x86-qemu.h" + +.globl __am_kcontext_start +__am_kcontext_start: + // eax = arg, ebx = entry + pushl %eax + pushl $__am_panic_on_return + jmpl *%ebx + +trap: + cli + + subl $20, %esp + pushl %ebp + pushl %edi + pushl %esi + pushl $0 + pushl %edx + pushl %ecx + pushl %ebx + pushl %eax + movw %ds, %ax + pushl %eax + pushl $0 + + movw $KSEL(SEG_KDATA), %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + pushl %esp + call __am_irq_handle + +.globl __am_iret +__am_iret: + addl $4, %esp + popl %eax + movl %eax, %esp + + addl $4, %esp + popl %eax + movw %ax, %ds + movw %ax, %es + + cmpw $KSEL(SEG_KCODE), 36(%esp) + je .kernel_iret + +.user_iret: + popl %eax + popl %ebx + popl %ecx + popl %edx + addl $4, %esp + popl %esi + popl %edi + popl %ebp + iret + +.kernel_iret: + popl %eax + popl %ebx + popl %ecx + popl %edx + addl $4, %esp + + /* stack frame: + 28 ss + 24 esp (not popped by iret when returning to ring0) + 20 eflags ---> move to new-esp + 16 cs + 12 eip + 8 ebp + 4 edi + 0 esi <--- %esp + */ + + movl %esp, %ebp + movl 24(%ebp), %edi // %edi is new-esp + + movl 20(%ebp), %esi; movl %esi, -4(%edi) + movl 16(%ebp), %esi; movl %esi, -8(%edi) + movl 12(%ebp), %esi; movl %esi, -12(%edi) + movl 8(%ebp), %esi; movl %esi, -16(%edi) + movl 4(%ebp), %esi; movl %esi, -20(%edi) + movl 0(%ebp), %esi; movl %esi, -24(%edi) + + leal -24(%edi), %esp + + popl %esi + popl %edi + popl %ebp + iret + +#define NOERR push $0 +#define ERR +#define IRQ_DEF(id, dpl, err) \ + .globl __am_irq##id; __am_irq##id: cli; err; push $id; jmp trap; +IRQS(IRQ_DEF) + .globl __am_irqall; __am_irqall: cli; push $0; push $-1; jmp trap; diff --git a/abstract-machine/am/src/x86/qemu/trap64.S b/abstract-machine/am/src/x86/qemu/trap64.S new file mode 100644 index 0000000..d26b8d2 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/trap64.S @@ -0,0 +1,61 @@ +#include "x86-qemu.h" + +.globl __am_kcontext_start +__am_kcontext_start: + // rdi = arg, rsi = entry + pushq $__am_panic_on_return + jmpq *%rsi + +trap: + cli + subq $48, %rsp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %r11 + pushq %r10 + pushq %r9 + pushq %r8 + pushq %rdi + pushq %rsi + pushq %rbp + pushq %rdx + pushq %rcx + pushq %rbx + pushq %rax + pushq $0 // cr3, saved in __am_irq_handle + + movq %rsp, %rdi + call __am_irq_handle + +.globl __am_iret +__am_iret: + movq %rdi, %rsp + movq 160(%rsp), %rax + movw %ax, %ds + movw %ax, %es + addq $8, %rsp + popq %rax + popq %rbx + popq %rcx + popq %rdx + popq %rbp + popq %rsi + popq %rdi + popq %r8 + popq %r9 + popq %r10 + popq %r11 + popq %r12 + popq %r13 + popq %r14 + popq %r15 + iretq + +#define NOERR push $0 +#define ERR +#define IRQ_DEF(id, dpl, err) \ + .globl __am_irq##id; __am_irq##id: cli; err; push $id; jmp trap; +IRQS(IRQ_DEF) + .globl __am_irqall; __am_irqall: cli; push $0; push $-1; jmp trap; diff --git a/abstract-machine/am/src/x86/qemu/trm.c b/abstract-machine/am/src/x86/qemu/trm.c new file mode 100644 index 0000000..862da5d --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/trm.c @@ -0,0 +1,112 @@ +#include "x86-qemu.h" + +Area heap = {}; +int __am_ncpu = 0; + +int main(const char *args); + +static void call_main(const char *args) { + halt(main(args)); +} + +void _start_c(char *args) { + if (boot_record()->is_ap) { + __am_othercpu_entry(); + } else { + __am_bootcpu_init(); + stack_switch_call(stack_top(&CPU->stack), call_main, (uintptr_t)args); + } +} + +void __am_bootcpu_init() { + heap = __am_heap_init(); + __am_lapic_init(); + __am_ioapic_init(); + __am_percpu_init(); +} + +void __am_percpu_init() { + __am_percpu_initgdt(); + __am_percpu_initlapic(); + __am_percpu_initirq(); +} + +void putch(char ch) { + #define COM1 0x3f8 + outb(COM1, ch); +} + +void halt(int code) { + const char *hex = "0123456789abcdef"; + const char *fmt = "CPU #$ Halt (40).\n"; + cli(); + __am_stop_the_world(); + for (const char *p = fmt; *p; p++) { + char ch = *p; + switch (ch) { + case '$': + putch(hex[cpu_current()]); + break; + case '0': case '4': + putch(hex[(code >> (ch - '0')) & 0xf]); + break; + default: + putch(ch); + } + } + outw(0x604, 0x2000); // offer of qemu :) + while (1) hlt(); +} + +Area __am_heap_init() { + extern char end; + outb(0x70, 0x34); + uint32_t lo = inb(0x71); + outb(0x70, 0x35); + uint32_t hi = inb(0x71) + 1; + return RANGE(ROUNDUP(&end, 1 << 20), (uintptr_t)((lo | hi << 8) << 16)); +} + +void __am_lapic_init() { + for (char *st = (char *)0xf0000; st != (char *)0xffffff; st ++) { + if (*(volatile uint32_t *)st == 0x5f504d5f) { + uint32_t mpconf_ptr = ((volatile MPDesc *)st)->conf; + MPConf *conf = (void *)((uintptr_t)(mpconf_ptr)); + __am_lapic = (void *)((uintptr_t)(conf->lapicaddr)); + for (volatile char *ptr = (char *)(conf + 1); + ptr < (char *)conf + conf->length; ptr += 8) { + if (*ptr == '\0') { + ptr += 12; + panic_on(++__am_ncpu > MAX_CPU, "cannot support > MAX_CPU processors"); + } + } + return; + } + } + bug(); +} + +void __am_percpu_initgdt() { +#if __x86_64__ + SegDesc *gdt = CPU->gdt; + TSS64 *tss = &CPU->tss; + gdt[SEG_KCODE] = SEG64(STA_X | STA_R, DPL_KERN); + gdt[SEG_KDATA] = SEG64(STA_W, DPL_KERN); + gdt[SEG_UCODE] = SEG64(STA_X | STA_R, DPL_USER); + gdt[SEG_UDATA] = SEG64(STA_W, DPL_USER); + gdt[SEG_TSS] = SEG16(STS_T32A, tss, sizeof(*tss)-1, DPL_KERN); + bug_on((uintptr_t)tss >> 32); + set_gdt(gdt, sizeof(gdt[0]) * (NR_SEG + 1)); + set_tr(KSEL(SEG_TSS)); +#else + SegDesc *gdt = CPU->gdt; + TSS32 *tss = &CPU->tss; + gdt[SEG_KCODE] = SEG32(STA_X | STA_R, 0, 0xffffffff, DPL_KERN); + gdt[SEG_KDATA] = SEG32(STA_W, 0, 0xffffffff, DPL_KERN); + gdt[SEG_UCODE] = SEG32(STA_X | STA_R, 0, 0xffffffff, DPL_USER); + gdt[SEG_UDATA] = SEG32(STA_W, 0, 0xffffffff, DPL_USER); + gdt[SEG_TSS] = SEG16(STS_T32A, tss, sizeof(*tss)-1, DPL_KERN); + set_gdt(gdt, sizeof(gdt[0]) * NR_SEG); + set_tr(KSEL(SEG_TSS)); +#endif +} diff --git a/abstract-machine/am/src/x86/qemu/vme.c b/abstract-machine/am/src/x86/qemu/vme.c new file mode 100644 index 0000000..a493844 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/vme.c @@ -0,0 +1,181 @@ +#include "x86-qemu.h" + +const struct mmu_config mmu = { + .pgsize = 4096, +#if __x86_64__ + .ptlevels = 4, + .pgtables = { + { "CR3", 0x000000000000, 0, 0 }, + { "PML4", 0xff8000000000, 39, 9 }, + { "PDPT", 0x007fc0000000, 30, 9 }, + { "PD", 0x00003fe00000, 21, 9 }, + { "PT", 0x0000001ff000, 12, 9 }, + }, +#else + .ptlevels = 2, + .pgtables = { + { "CR3", 0x00000000, 0, 0 }, + { "PD", 0xffc00000, 22, 10 }, + { "PT", 0x003ff000, 12, 10 }, + }, +#endif +}; + +static const struct vm_area vm_areas[] = { +#ifdef __x86_64__ + { RANGE(0x100000000000, 0x108000000000), 0 }, // 512 GiB user space + { RANGE(0x000000000000, 0x008000000000), 1 }, // 512 GiB kernel +#else + { RANGE( 0x40000000, 0x80000000), 0 }, // 1 GiB user space + { RANGE( 0x00000000, 0x40000000), 1 }, // 1 GiB kernel + { RANGE( 0xfd000000, 0x00000000), 1 }, // memory-mapped I/O +#endif +}; +#define uvm_area (vm_areas[0].area) + +static uintptr_t *kpt; +static void *(*pgalloc)(int size); +static void (*pgfree)(void *); + +static void *pgallocz() { + uintptr_t *base = pgalloc(mmu.pgsize); + panic_on(!base, "cannot allocate page"); + for (int i = 0; i < mmu.pgsize / sizeof(uintptr_t); i++) { + base[i] = 0; + } + return base; +} + +static int indexof(uintptr_t addr, const struct ptinfo *info) { + return ((uintptr_t)addr & info->mask) >> info->shift; +} + +static uintptr_t baseof(uintptr_t addr) { + return addr & ~(mmu.pgsize - 1); +} + +static uintptr_t *ptwalk(AddrSpace *as, uintptr_t addr, int flags) { + uintptr_t cur = (uintptr_t)&as->ptr; + + for (int i = 0; i <= mmu.ptlevels; i++) { + const struct ptinfo *ptinfo = &mmu.pgtables[i]; + uintptr_t *pt = (uintptr_t *)cur, next_page; + int index = indexof(addr, ptinfo); + if (i == mmu.ptlevels) return &pt[index]; + + if (!(pt[index] & PTE_P)) { + next_page = (uintptr_t)pgallocz(); + pt[index] = next_page | PTE_P | flags; + } else { + next_page = baseof(pt[index]); + } + cur = next_page; + } + bug(); +} + +static void teardown(int level, uintptr_t *pt) { + if (level > mmu.ptlevels) return; + for (int index = 0; index < (1 << mmu.pgtables[level].bits); index++) { + if ((pt[index] & PTE_P) && (pt[index] & PTE_U)) { + teardown(level + 1, (void *)baseof(pt[index])); + } + } + if (level >= 1) { + pgfree(pt); + } +} + +bool vme_init(void *(*_pgalloc)(int size), void (*_pgfree)(void *)) { + panic_on(cpu_current() != 0, "init VME in non-bootstrap CPU"); + pgalloc = _pgalloc; + pgfree = _pgfree; + +#if __x86_64__ + kpt = (void *)PML4_ADDR; +#else + AddrSpace as; + as.ptr = NULL; + for (int i = 0; i < LENGTH(vm_areas); i++) { + const struct vm_area *vma = &vm_areas[i]; + if (vma->kernel) { + for (uintptr_t cur = (uintptr_t)vma->area.start; + cur != (uintptr_t)vma->area.end; + cur += mmu.pgsize) { + *ptwalk(&as, cur, PTE_W) = cur | PTE_P | PTE_W; + } + } + } + kpt = (void *)baseof((uintptr_t)as.ptr); +#endif + + set_cr3(kpt); + set_cr0(get_cr0() | CR0_PG); + return true; +} + +void protect(AddrSpace *as) { + uintptr_t *upt = pgallocz(); + + for (int i = 0; i < LENGTH(vm_areas); i++) { + const struct vm_area *vma = &vm_areas[i]; + if (vma->kernel) { + const struct ptinfo *info = &mmu.pgtables[1]; // level-1 page table + for (uintptr_t cur = (uintptr_t)vma->area.start; + cur != (uintptr_t)vma->area.end; + cur += (1L << info->shift)) { + int index = indexof(cur, info); + upt[index] = kpt[index]; + } + } + } + as->pgsize = mmu.pgsize; + as->area = uvm_area; + as->ptr = (void *)((uintptr_t)upt | PTE_P | PTE_U); +} + +void unprotect(AddrSpace *as) { + teardown(0, (void *)&as->ptr); +} + +void map(AddrSpace *as, void *va, void *pa, int prot) { + panic_on(!IN_RANGE(va, uvm_area), "mapping an invalid address"); + panic_on((uintptr_t)va != ROUNDDOWN(va, mmu.pgsize) || + (uintptr_t)pa != ROUNDDOWN(pa, mmu.pgsize), "non-page-boundary address"); + + uintptr_t *ptentry = ptwalk(as, (uintptr_t)va, PTE_W | PTE_U); + if (prot == MMAP_NONE) { + panic_on(!(*ptentry & PTE_P), "unmapping a non-mapped page"); + *ptentry = 0; + } else { + panic_on(*ptentry & PTE_P, "remapping a mapped page"); + uintptr_t pte = (uintptr_t)pa | PTE_P | PTE_U | ((prot & MMAP_WRITE) ? PTE_W : 0); + *ptentry = pte; + } + ptwalk(as, (uintptr_t)va, PTE_W | PTE_U); +} + +Context *ucontext(AddrSpace *as, Area kstack, void *entry) { + Context *ctx = kstack.end - sizeof(Context); + *ctx = (Context) { 0 }; + +#if __x86_64__ + ctx->cs = USEL(SEG_UCODE); + ctx->ss = USEL(SEG_UDATA); + ctx->rip = (uintptr_t)entry; + ctx->rflags = FL_IF; + ctx->rsp = (uintptr_t)uvm_area.end; + ctx->rsp0 = (uintptr_t)kstack.end; +#else + ctx->cs = USEL(SEG_UCODE); + ctx->ds = USEL(SEG_UDATA); + ctx->ss3 = USEL(SEG_UDATA); + ctx->eip = (uintptr_t)entry; + ctx->eflags = FL_IF; + ctx->esp = (uintptr_t)uvm_area.end; + ctx->esp0 = (uintptr_t)kstack.end; +#endif + ctx->cr3 = as->ptr; + + return ctx; +} diff --git a/abstract-machine/am/src/x86/qemu/x86-qemu.h b/abstract-machine/am/src/x86/qemu/x86-qemu.h new file mode 100644 index 0000000..21bb806 --- /dev/null +++ b/abstract-machine/am/src/x86/qemu/x86-qemu.h @@ -0,0 +1,100 @@ +#include + +#define PML4_ADDR 0x1000 +#define PDPT_ADDR 0x2000 + +#define NR_SEG 6 // GDT size +#define SEG_KCODE 1 // Kernel code +#define SEG_KDATA 2 // Kernel data/stack +#define SEG_UCODE 3 // User code +#define SEG_UDATA 4 // User data/stack +#define SEG_TSS 5 // Global unique task state segement + +#define NR_IRQ 256 // IDT size + +#ifndef __ASSEMBLER__ + +#include +#include + +struct kernel_stack { + uint8_t stack[8192]; +}; + +static inline void *stack_top(struct kernel_stack *stk) { + return stk->stack + sizeof(stk->stack); +} + +struct mmu_config { + int ptlevels, pgsize; + struct ptinfo { + const char *name; + uintptr_t mask; + int shift, bits; + } pgtables[]; +}; + +struct vm_area { + Area area; + int kernel; +}; + +void __am_iret(Context *ctx); + +struct cpu_local { + AddrSpace *uvm; +#if __x86_64__ + SegDesc gdt[NR_SEG + 1]; + TSS64 tss; +#else + SegDesc gdt[NR_SEG]; + TSS32 tss; +#endif + struct kernel_stack stack; +}; + +#if __x86_64__ +struct trap_frame { + Context saved_context; + uint64_t irq, errcode; + uint64_t rip, cs, rflags, rsp, ss; +}; +#else +struct trap_frame { + Context saved_context; + uint32_t irq, errcode; + uint32_t eip, cs, eflags, esp, ss; +}; +#endif + +extern volatile uint32_t *__am_lapic; +extern int __am_ncpu; +extern struct cpu_local __am_cpuinfo[MAX_CPU]; + +#define CPU (&__am_cpuinfo[cpu_current()]) + +#define bug_on(cond) \ + do { \ + if (cond) panic("internal error (likely a bug in AM)"); \ + } while (0) + +#define bug() bug_on(1) + +// apic utils +void __am_lapic_eoi(); +void __am_ioapic_init(); +void __am_lapic_bootap(uint32_t cpu, void *address); +void __am_ioapic_enable(int irq, int cpu); + +// x86-specific operations +void __am_bootcpu_init(); +void __am_percpu_init(); +Area __am_heap_init(); +void __am_lapic_init(); +void __am_othercpu_entry(); +void __am_percpu_initirq(); +void __am_percpu_initgdt(); +void __am_percpu_initlapic(); +void __am_stop_the_world(); + +#endif diff --git a/abstract-machine/am/src/x86/x86.h b/abstract-machine/am/src/x86/x86.h new file mode 100644 index 0000000..130835e --- /dev/null +++ b/abstract-machine/am/src/x86/x86.h @@ -0,0 +1,353 @@ +// CPU rings +#define DPL_KERN 0x0 // Kernel (ring 0) +#define DPL_USER 0x3 // User (ring 3) + +// Application Segment type bits +#define STA_X 0x8 // Executable segment +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) + +// System Segment type bits +#define STS_T32A 0x9 // Available 32-bit TSS +#define STS_IG 0xe // 32/64-bit Interrupt Gate +#define STS_TG 0xf // 32/64-bit Trap Gate + +// EFLAGS register +#define FL_IF 0x00000200 // Interrupt Enable + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_PG 0x80000000 // Paging +#define CR4_PAE 0x00000020 // Physical Address Extension + +// Page table/directory entry flags +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PS 0x080 // Large Page (1 GiB or 2 MiB) + +// GDT selectors +#define KSEL(seg) (((seg) << 3) | DPL_KERN) +#define USEL(seg) (((seg) << 3) | DPL_USER) + +// Interrupts and exceptions +#define T_IRQ0 32 +#define IRQ_TIMER 0 +#define IRQ_KBD 1 +#define IRQ_COM1 4 +#define IRQ_ERROR 19 +#define IRQ_SPURIOUS 31 +#define EX_DE 0 +#define EX_UD 6 +#define EX_NM 7 +#define EX_DF 8 +#define EX_TS 10 +#define EX_NP 11 +#define EX_SS 12 +#define EX_GP 13 +#define EX_PF 14 +#define EX_MF 15 +#define EX_SYSCALL 0x80 +#define EX_YIELD 0x81 + +// List of interrupts and exceptions (#irq, DPL, hardware errorcode) +#define IRQS(_) \ + _( 0, KERN, NOERR) \ + _( 1, KERN, NOERR) \ + _( 2, KERN, NOERR) \ + _( 3, KERN, NOERR) \ + _( 4, KERN, NOERR) \ + _( 5, KERN, NOERR) \ + _( 6, KERN, NOERR) \ + _( 7, KERN, NOERR) \ + _( 8, KERN, ERR) \ + _( 9, KERN, NOERR) \ + _( 10, KERN, ERR) \ + _( 11, KERN, ERR) \ + _( 12, KERN, ERR) \ + _( 13, KERN, ERR) \ + _( 14, KERN, ERR) \ + _( 15, KERN, NOERR) \ + _( 16, KERN, NOERR) \ + _( 19, KERN, NOERR) \ + _( 31, KERN, NOERR) \ + _( 32, KERN, NOERR) \ + _( 33, KERN, NOERR) \ + _( 34, KERN, NOERR) \ + _( 35, KERN, NOERR) \ + _( 36, KERN, NOERR) \ + _( 37, KERN, NOERR) \ + _( 38, KERN, NOERR) \ + _( 39, KERN, NOERR) \ + _( 40, KERN, NOERR) \ + _( 41, KERN, NOERR) \ + _( 42, KERN, NOERR) \ + _( 43, KERN, NOERR) \ + _( 44, KERN, NOERR) \ + _( 45, KERN, NOERR) \ + _( 46, KERN, NOERR) \ + _( 47, KERN, NOERR) \ + _(128, USER, NOERR) \ + _(129, USER, NOERR) + +// AM-specific configurations +#define MAX_CPU 8 +#define BOOTREC_ADDR 0x07000 +#define MAINARG_ADDR 0x10000 + +// Below are only visible to c/c++ files +#ifndef __ASSEMBLER__ + +#include + +// Segment Descriptor +typedef struct { + uint32_t lim_15_0 : 16; // Low bits of segment limit + uint32_t base_15_0 : 16; // Low bits of segment base address + uint32_t base_23_16 : 8; // Middle bits of segment base address + uint32_t type : 4; // Segment type (see STS_ constants) + uint32_t s : 1; // 0 = system, 1 = application + uint32_t dpl : 2; // Descriptor Privilege Level + uint32_t p : 1; // Present + uint32_t lim_19_16 : 4; // High bits of segment limit + uint32_t avl : 1; // Unused (available for software use) + uint32_t l : 1; // 64-bit segment + uint32_t db : 1; // 32-bit segment + uint32_t g : 1; // Granularity: limit scaled by 4K when set + uint32_t base_31_24 : 8; // High bits of segment base address +} SegDesc; + +// Gate descriptors for interrupts and traps +typedef struct { + uint32_t off_15_0 : 16; // Low 16 bits of offset in segment + uint32_t cs : 16; // Code segment selector + uint32_t args : 5; // # args, 0 for interrupt/trap gates + uint32_t rsv1 : 3; // Reserved(should be zero I guess) + uint32_t type : 4; // Type(STS_{TG,IG32,TG32}) + uint32_t s : 1; // Must be 0 (system) + uint32_t dpl : 2; // Descriptor(meaning new) privilege level + uint32_t p : 1; // Present + uint32_t off_31_16 : 16; // High bits of offset in segment +} GateDesc32; + +typedef struct { + uint32_t off_15_0 : 16; + uint32_t cs : 16; + uint32_t isv : 3; + uint32_t zero1 : 5; + uint32_t type : 4; + uint32_t zero2 : 1; + uint32_t dpl : 2; + uint32_t p : 1; + uint32_t off_31_16 : 16; + uint32_t off_63_32 : 32; + uint32_t rsv : 32; +} GateDesc64; + +// Task State Segment (TSS) +typedef struct { + uint32_t link; // Unused + uint32_t esp0; // Stack pointers and segment selectors + uint32_t ss0; // after an increase in privilege level + uint32_t padding[23]; +} __attribute__((packed)) TSS32; + +typedef struct { + uint32_t rsv; + uint64_t rsp0, rsp1, rsp2; + uint32_t padding[19]; +} __attribute__((packed)) TSS64; + +// Multiprocesor configuration +typedef struct { // configuration table header + uint8_t signature[4]; // "PCMP" + uint16_t length; // total table length + uint8_t version; // [14] + uint8_t checksum; // all bytes must add up to 0 + uint8_t product[20]; // product id + uint32_t oemtable; // OEM table pointer + uint16_t oemlength; // OEM table length + uint16_t entry; // entry count + uint32_t lapicaddr; // address of local APIC + uint16_t xlength; // extended table length + uint8_t xchecksum; // extended table checksum + uint8_t reserved; +} MPConf; + +typedef struct { + int magic; + uint32_t conf; // MP config table addr + uint8_t length; // 1 + uint8_t specrev; // [14] + uint8_t checksum; // all bytes add to 0 + uint8_t type; // config type + uint8_t imcrp; + uint8_t reserved[3]; +} MPDesc; + +typedef struct { + uint32_t jmp_code; + int32_t is_ap; +} BootRecord; + +#define SEG16(type, base, lim, dpl) (SegDesc) \ +{ (lim) & 0xffff, (uintptr_t)(base) & 0xffff, \ + ((uintptr_t)(base) >> 16) & 0xff, type, 0, dpl, 1, \ + (uintptr_t)(lim) >> 16, 0, 0, 1, 0, (uintptr_t)(base) >> 24 } + +#define SEG32(type, base, lim, dpl) (SegDesc) \ +{ ((lim) >> 12) & 0xffff, (uintptr_t)(base) & 0xffff, \ + ((uintptr_t)(base) >> 16) & 0xff, type, 1, dpl, 1, \ + (uintptr_t)(lim) >> 28, 0, 0, 1, 1, (uintptr_t)(base) >> 24 } + +#define SEG64(type, dpl) (SegDesc) \ + { 0, 0, 0, type, 1, dpl, 1, 0, 0, 1, 0, 0 } + +#define SEGTSS64(type, base, lim, dpl) (SegDesc) \ +{ (lim) & 0xffff, (uint32_t)(base) & 0xffff, \ + ((uint32_t)(base) >> 16) & 0xff, type, 0, dpl, 1, \ + (uint32_t)(lim) >> 16, 0, 0, 0, 0, (uint32_t)(base) >> 24 } + +#define GATE32(type, cs, entry, dpl) (GateDesc32) \ + { (uint32_t)(entry) & 0xffff, (cs), 0, 0, (type), 0, (dpl), \ + 1, (uint32_t)(entry) >> 16 } + +#define GATE64(type, cs, entry, dpl) (GateDesc64) \ + { (uint64_t)(entry) & 0xffff, (cs), 0, 0, (type), 0, (dpl), \ + 1, ((uint64_t)(entry) >> 16) & 0xffff, (uint64_t)(entry) >> 32, 0 } + +// Instruction wrappers + +static inline uint8_t inb(int port) { + uint8_t data; + asm volatile ("inb %1, %0" : "=a"(data) : "d"((uint16_t)port)); + return data; +} + +static inline uint16_t inw(int port) { + uint16_t data; + asm volatile ("inw %1, %0" : "=a"(data) : "d"((uint16_t)port)); + return data; +} + +static inline uint32_t inl(int port) { + uint32_t data; + asm volatile ("inl %1, %0" : "=a"(data) : "d"((uint16_t)port)); + return data; +} + +static inline void outb(int port, uint8_t data) { + asm volatile ("outb %%al, %%dx" : : "a"(data), "d"((uint16_t)port)); +} + +static inline void outw(int port, uint16_t data) { + asm volatile ("outw %%ax, %%dx" : : "a"(data), "d"((uint16_t)port)); +} + +static inline void outl(int port, uint32_t data) { + asm volatile ("outl %%eax, %%dx" : : "a"(data), "d"((uint16_t)port)); +} + +static inline void cli() { + asm volatile ("cli"); +} + +static inline void sti() { + asm volatile ("sti"); +} + +static inline void hlt() { + asm volatile ("hlt"); +} + +static inline void pause() { + asm volatile ("pause"); +} + +static inline uint32_t get_efl() { + volatile uintptr_t efl; + asm volatile ("pushf; pop %0": "=r"(efl)); + return efl; +} + +static inline uintptr_t get_cr0(void) { + volatile uintptr_t val; + asm volatile ("mov %%cr0, %0" : "=r"(val)); + return val; +} + +static inline void set_cr0(uintptr_t cr0) { + asm volatile ("mov %0, %%cr0" : : "r"(cr0)); +} + +static inline void set_idt(void *idt, int size) { + static volatile struct { + int16_t size; + void *idt; + } __attribute__((packed)) data; + data.size = size; + data.idt = idt; + asm volatile ("lidt (%0)" : : "r"(&data)); +} + +static inline void set_gdt(void *gdt, int size) { + static volatile struct { + int16_t size; + void *gdt; + } __attribute__((packed)) data; + data.size = size; + data.gdt = gdt; + asm volatile ("lgdt (%0)" : : "r"(&data)); +} + +static inline void set_tr(int selector) { + asm volatile ("ltr %0" : : "r"((uint16_t)selector)); +} + +static inline uintptr_t get_cr2() { + volatile uintptr_t val; + asm volatile ("mov %%cr2, %0" : "=r"(val)); + return val; +} + +static inline uintptr_t get_cr3() { + volatile uintptr_t val; + asm volatile ("mov %%cr3, %0" : "=r"(val)); + return val; +} + +static inline void set_cr3(void *pdir) { + asm volatile ("mov %0, %%cr3" : : "r"(pdir)); +} + +static inline int xchg(int *addr, int newval) { + int result; + asm volatile ("lock xchg %0, %1": + "+m"(*addr), "=a"(result) : "1"(newval) : "cc", "memory"); + return result; +} + +static inline uint64_t rdtsc() { + uint32_t lo, hi; + asm volatile ("rdtsc": "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +} + +#define interrupt(id) \ + asm volatile ("int $" #id); + +static inline void stack_switch_call(void *sp, void *entry, uintptr_t arg) { + asm volatile ( +#if __x86_64__ + "movq %0, %%rsp; movq %2, %%rdi; jmp *%1" : : "b"((uintptr_t)sp), "d"(entry), "a"(arg) +#else + "movl %0, %%esp; movl %2, 4(%0); jmp *%1" : : "b"((uintptr_t)sp - 8), "d"(entry), "a"(arg) +#endif + ); +} + +static inline volatile BootRecord *boot_record() { + return (BootRecord *)BOOTREC_ADDR; +} + +#endif // __ASSEMBLER__ diff --git a/abstract-machine/klib/Makefile b/abstract-machine/klib/Makefile new file mode 100644 index 0000000..b117c60 --- /dev/null +++ b/abstract-machine/klib/Makefile @@ -0,0 +1,3 @@ +NAME = klib +SRCS = $(shell find src/ -name "*.c") +include $(AM_HOME)/Makefile diff --git a/abstract-machine/klib/include/klib-macros.h b/abstract-machine/klib/include/klib-macros.h new file mode 100644 index 0000000..1f7c370 --- /dev/null +++ b/abstract-machine/klib/include/klib-macros.h @@ -0,0 +1,39 @@ +#ifndef KLIB_MACROS_H__ +#define KLIB_MACROS_H__ + +#define ROUNDUP(a, sz) ((((uintptr_t)a) + (sz) - 1) & ~((sz) - 1)) +#define ROUNDDOWN(a, sz) ((((uintptr_t)a)) & ~((sz) - 1)) +#define LENGTH(arr) (sizeof(arr) / sizeof((arr)[0])) +#define RANGE(st, ed) (Area) { .start = (void *)(st), .end = (void *)(ed) } +#define IN_RANGE(ptr, area) ((area).start <= (ptr) && (ptr) < (area).end) + +#define STRINGIFY(s) #s +#define TOSTRING(s) STRINGIFY(s) +#define _CONCAT(x, y) x ## y +#define CONCAT(x, y) _CONCAT(x, y) + +#define putstr(s) \ + ({ for (const char *p = s; *p; p++) putch(*p); }) + +#define io_read(reg) \ + ({ reg##_T __io_param; \ + ioe_read(reg, &__io_param); \ + __io_param; }) + +#define io_write(reg, ...) \ + ({ reg##_T __io_param = (reg##_T) { __VA_ARGS__ }; \ + ioe_write(reg, &__io_param); }) + +#define static_assert(const_cond) \ + static char CONCAT(_static_assert_, __LINE__) [(const_cond) ? 1 : -1] __attribute__((unused)) + +#define panic_on(cond, s) \ + ({ if (cond) { \ + putstr("AM Panic: "); putstr(s); \ + putstr(" @ " __FILE__ ":" TOSTRING(__LINE__) " \n"); \ + halt(1); \ + } }) + +#define panic(s) panic_on(1, s) + +#endif diff --git a/abstract-machine/klib/include/klib.h b/abstract-machine/klib/include/klib.h new file mode 100644 index 0000000..ecb24c8 --- /dev/null +++ b/abstract-machine/klib/include/klib.h @@ -0,0 +1,58 @@ +#ifndef KLIB_H__ +#define KLIB_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +//#define __NATIVE_USE_KLIB__ + +// string.h +void *memset (void *s, int c, size_t n); +void *memcpy (void *dst, const void *src, size_t n); +void *memmove (void *dst, const void *src, size_t n); +int memcmp (const void *s1, const void *s2, size_t n); +size_t strlen (const char *s); +char *strcat (char *dst, const char *src); +char *strcpy (char *dst, const char *src); +char *strncpy (char *dst, const char *src, size_t n); +int strcmp (const char *s1, const char *s2); +int strncmp (const char *s1, const char *s2, size_t n); + +// stdlib.h +void srand (unsigned int seed); +int rand (void); +void *malloc (size_t size); +void free (void *ptr); +int abs (int x); +int atoi (const char *nptr); + +// stdio.h +int printf (const char *format, ...); +int sprintf (char *str, const char *format, ...); +int snprintf (char *str, size_t size, const char *format, ...); +int vsprintf (char *str, const char *format, va_list ap); +int vsnprintf (char *str, size_t size, const char *format, va_list ap); + +// assert.h +#ifdef NDEBUG + #define assert(ignore) ((void)0) +#else + #define assert(cond) \ + do { \ + if (!(cond)) { \ + printf("Assertion fail at %s:%d\n", __FILE__, __LINE__); \ + halt(1); \ + } \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/abstract-machine/klib/src/cpp.c b/abstract-machine/klib/src/cpp.c new file mode 100644 index 0000000..9c57751 --- /dev/null +++ b/abstract-machine/klib/src/cpp.c @@ -0,0 +1,19 @@ +#include +#include + +#ifndef __ISA_NATIVE__ + +void __dso_handle() { +} + +void __cxa_guard_acquire() { +} + +void __cxa_guard_release() { +} + +void __cxa_atexit() { + assert(0); +} + +#endif diff --git a/abstract-machine/klib/src/int64.c b/abstract-machine/klib/src/int64.c new file mode 100644 index 0000000..13376f4 --- /dev/null +++ b/abstract-machine/klib/src/int64.c @@ -0,0 +1,731 @@ +// divmoddi4.c from The LLVM Compiler Infrastructure + +/* Assumption: Signed integral is 2's complement. */ +/* Assumption: Right shift of signed negative is arithmetic shift. */ +/* Assumption: Endianness is little or big (not mixed). */ + +#if defined(__ELF__) +#define FNALIAS(alias_name, original_name) \ + void alias_name() __attribute__((__alias__(#original_name))) +#define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee))) +#else +#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") +#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")") +#endif + +/* ABI macro definitions */ + +#if __ARM_EABI__ +# ifdef COMPILER_RT_ARMHF_TARGET +# define COMPILER_RT_ABI +# else +# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +# endif +#else +# define COMPILER_RT_ABI +#endif + +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + +#ifdef _MSC_VER +#define ALWAYS_INLINE __forceinline +#define NOINLINE __declspec(noinline) +#define NORETURN __declspec(noreturn) +#define UNUSED +#else +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#endif + +#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE)) +/* + * Kernel and boot environment can't use normal headers, + * so use the equivalent system headers. + */ +# include +# include +# include +#else +/* Include the standard compiler builtin headers we use functionality from. */ +# include +# include +# include +# include +#endif + +/* Include the commonly used internal type definitions. */ +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + defined(__ORDER_LITTLE_ENDIAN__) + +/* Clang and GCC provide built-in endianness definitions. */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* __BYTE_ORDER__ */ + +#else /* Compilers other than Clang or GCC. */ + +#if defined(__SVR4) && defined(__sun) +#include + +#if defined(_BIG_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif defined(_LITTLE_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#else /* !_LITTLE_ENDIAN */ +#error "unknown endianness" +#endif /* !_LITTLE_ENDIAN */ + +#endif /* Solaris and AuroraUX. */ + +/* .. */ + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ + defined(__minix) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* *BSD */ + +#if defined(__OpenBSD__) +#include + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* OpenBSD */ + +/* .. */ + +/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the + * compiler (at least with GCC) */ +#if defined(__APPLE__) || defined(__ellcc__ ) + +#ifdef __BIG_ENDIAN__ +#if __BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#endif +#endif /* __BIG_ENDIAN__ */ + +#ifdef __LITTLE_ENDIAN__ +#if __LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif +#endif /* __LITTLE_ENDIAN__ */ + +#endif /* Mac OSX */ + +/* .. */ + +#if defined(_WIN32) + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif /* Windows */ + +#endif /* Clang or GCC. */ + +/* . */ + +#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) +#error Unable to determine endian +#endif /* Check we found an endianness correctly. */ + +/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */ +#ifdef si_int +#undef si_int +#endif +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +typedef union +{ + di_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + si_int high; +#else + si_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} dwords; + +typedef union +{ + du_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + su_int high; +#else + su_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} udwords; + +#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))// || defined(__riscv) +#define CRT_HAS_128BIT +#endif + +#ifdef CRT_HAS_128BIT +typedef int ti_int __attribute__ ((mode (TI))); +typedef unsigned tu_int __attribute__ ((mode (TI))); + +typedef union +{ + ti_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} twords; + +typedef union +{ + tu_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +#endif /* CRT_HAS_128BIT */ + +typedef union +{ + su_int u; + float f; +} float_bits; + +typedef union +{ + udwords u; + double f; +} double_bits; + +typedef struct +{ +#if _YUGA_LITTLE_ENDIAN + udwords low; + udwords high; +#else + udwords high; + udwords low; +#endif /* _YUGA_LITTLE_ENDIAN */ +} uqwords; + +typedef union +{ + uqwords u; + long double f; +} long_double_bits; + +#if __STDC_VERSION__ >= 199901L +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; + +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) +#else +typedef struct { float real, imaginary; } Fcomplex; + +typedef struct { double real, imaginary; } Dcomplex; + +typedef struct { long double real, imaginary; } Lcomplex; + +#define COMPLEX_REAL(x) (x).real +#define COMPLEX_IMAGINARY(x) (x).imaginary +#endif + + +/* Include internal utility function declarations. */ +/** \brief Trigger a program abort (or panic for kernel code). */ +#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) + +NORETURN void compilerrt_abort_impl(const char *file, int line, + const char *function); + +#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) +#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) +#define COMPILE_TIME_ASSERT2(expr, cnt) \ + typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED + +COMPILER_RT_ABI si_int __paritysi2(si_int a); +COMPILER_RT_ABI si_int __paritydi2(di_int a); + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); + +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); +#ifdef CRT_HAS_128BIT +COMPILER_RT_ABI si_int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); +#endif + +/* Definitions for builtins unavailable on MSVC */ +#if defined(_MSC_VER) && !defined(__clang__) +#include + +uint32_t __inline __builtin_ctz(uint32_t value) { + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, value)) + return trailing_zero; + return 32; +} + +uint32_t __inline __builtin_clz(uint32_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse(&leading_zero, value)) + return 31 - leading_zero; + return 32; +} + +#if defined(_M_ARM) || defined(_M_X64) +uint32_t __inline __builtin_clzll(uint64_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse64(&leading_zero, value)) + return 63 - leading_zero; + return 64; +} +#else +uint32_t __inline __builtin_clzll(uint64_t value) { + if (value == 0) + return 64; + uint32_t msh = (uint32_t)(value >> 32); + uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); + if (msh != 0) + return __builtin_clz(msh); + return 32 + __builtin_clz(lsh); +} +#endif + +#define __builtin_clzl __builtin_clzll +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +#include + +#if !defined(__ARCH_RISCV64_MYCPU) +/* Returns: a / b */ + +COMPILER_RT_ABI di_int +__divdi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ + di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /*sign of quotient */ + return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +} + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI di_int +__divmoddi4(di_int a, di_int b, di_int* rem) +{ + di_int d = __divdi3(a,b); + *rem = a - (d*b); + return d; +} + +/* Returns: a % b */ + +COMPILER_RT_ABI di_int +__moddi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + du_int r; + __udivmoddi4(a, b, &r); + return ((di_int)r ^ s) - s; /* negate if s == -1 */ +} + +/* Returns: a / b */ + +COMPILER_RT_ABI du_int +__udivdi3(du_int a, du_int b) +{ + return __udivmoddi4(a, b, 0); +} + +/* Returns: a % b */ + +COMPILER_RT_ABI du_int +__umoddi3(du_int a, du_int b) +{ + du_int r; + __udivmoddi4(a, b, &r); + return r; +} +#endif + + +COMPILER_RT_ABI du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI si_int __clzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0 + x >>= 16 - t; // x = [0 - 0xFFFF] + su_int r = t; // r = [0, 16] + // return r + clz(x) + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; // x = [0 - 0xFF] + r += t; // r = [0, 8, 16, 24] + // return r + clz(x) + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; // x = [0 - 0xF] + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + clz(x) + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + clz(x) + // switch (x) + // { + // case 0: + // return r + 2; + // case 1: + // return r + 1; + // case 2: + // case 3: + // return r; + // } + return r + ((2 - x) & -((x & 2) == 0)); +} + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI si_int __ctzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) + << 4; // if (x has no small bits) t = 16 else 0 + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + su_int r = t; // r = [0, 16] + // return r + ctz(x) + t = ((x & 0x00FF) == 0) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; // r = [0, 8, 16, 24] + // return r + ctz(x) + t = ((x & 0x0F) == 0) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + ctz(x) + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + ctz(x) + + // The branch-less return statement below is equivalent + // to the following switch statement: + // switch (x) + // { + // case 0: + // return r + 2; + // case 2: + // return r + 1; + // case 1: + // case 3: + // return r; + // } + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} + +typedef int si_int; +typedef long long di_int; +typedef unsigned su_int; +#define CHAR_BIT __CHAR_BIT__ + + +si_int __ctzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return __ctzsi2((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} + +si_int __clzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return __clzsi2((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/abstract-machine/klib/src/stdio.c b/abstract-machine/klib/src/stdio.c new file mode 100644 index 0000000..1b19953 --- /dev/null +++ b/abstract-machine/klib/src/stdio.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#if !defined(__ISA_NATIVE__) || defined(__NATIVE_USE_KLIB__) + +int printf(const char *fmt, ...) { + panic("Not implemented"); +} + +int vsprintf(char *out, const char *fmt, va_list ap) { + panic("Not implemented"); +} + +int sprintf(char *out, const char *fmt, ...) { + panic("Not implemented"); +} + +int snprintf(char *out, size_t n, const char *fmt, ...) { + panic("Not implemented"); +} + +int vsnprintf(char *out, size_t n, const char *fmt, va_list ap) { + panic("Not implemented"); +} + +#endif diff --git a/abstract-machine/klib/src/stdlib.c b/abstract-machine/klib/src/stdlib.c new file mode 100644 index 0000000..382635d --- /dev/null +++ b/abstract-machine/klib/src/stdlib.c @@ -0,0 +1,45 @@ +#include +#include +#include + +#if !defined(__ISA_NATIVE__) || defined(__NATIVE_USE_KLIB__) +static unsigned long int next = 1; + +int rand(void) { + // RAND_MAX assumed to be 32767 + next = next * 1103515245 + 12345; + return (unsigned int)(next/65536) % 32768; +} + +void srand(unsigned int seed) { + next = seed; +} + +int abs(int x) { + return (x < 0 ? -x : x); +} + +int atoi(const char* nptr) { + int x = 0; + while (*nptr == ' ') { nptr ++; } + while (*nptr >= '0' && *nptr <= '9') { + x = x * 10 + *nptr - '0'; + nptr ++; + } + return x; +} + +void *malloc(size_t size) { + // On native, malloc() will be called during initializaion of C runtime. + // Therefore do not call panic() here, else it will yield a dead recursion: + // panic() -> putchar() -> (glibc) -> malloc() -> panic() +#if !(defined(__ISA_NATIVE__) && defined(__NATIVE_USE_KLIB__)) + panic("Not implemented"); +#endif + return NULL; +} + +void free(void *ptr) { +} + +#endif diff --git a/abstract-machine/klib/src/string.c b/abstract-machine/klib/src/string.c new file mode 100644 index 0000000..f1a1f22 --- /dev/null +++ b/abstract-machine/klib/src/string.c @@ -0,0 +1,47 @@ +#include +#include +#include + +#if !defined(__ISA_NATIVE__) || defined(__NATIVE_USE_KLIB__) + +size_t strlen(const char *s) { + panic("Not implemented"); +} + +char *strcpy(char *dst, const char *src) { + panic("Not implemented"); +} + +char *strncpy(char *dst, const char *src, size_t n) { + panic("Not implemented"); +} + +char *strcat(char *dst, const char *src) { + panic("Not implemented"); +} + +int strcmp(const char *s1, const char *s2) { + panic("Not implemented"); +} + +int strncmp(const char *s1, const char *s2, size_t n) { + panic("Not implemented"); +} + +void *memset(void *s, int c, size_t n) { + panic("Not implemented"); +} + +void *memmove(void *dst, const void *src, size_t n) { + panic("Not implemented"); +} + +void *memcpy(void *out, const void *in, size_t n) { + panic("Not implemented"); +} + +int memcmp(const void *s1, const void *s2, size_t n) { + panic("Not implemented"); +} + +#endif diff --git a/abstract-machine/scripts/isa/loongarch32r.mk b/abstract-machine/scripts/isa/loongarch32r.mk new file mode 100644 index 0000000..48af70a --- /dev/null +++ b/abstract-machine/scripts/isa/loongarch32r.mk @@ -0,0 +1,4 @@ +CROSS_COMPILE := loongarch32r-linux-gnusf- +COMMON_FLAGS := -fno-pic +CFLAGS += $(COMMON_FLAGS) -static +ASFLAGS += $(COMMON_FLAGS) -O0 diff --git a/abstract-machine/scripts/isa/mips32.mk b/abstract-machine/scripts/isa/mips32.mk new file mode 100644 index 0000000..00989b1 --- /dev/null +++ b/abstract-machine/scripts/isa/mips32.mk @@ -0,0 +1,5 @@ +CROSS_COMPILE := mips-linux-gnu- +COMMON_FLAGS := -march=mips32 -fno-pic -fno-delayed-branch -mno-abicalls -mno-check-zero-division -EL +CFLAGS += $(COMMON_FLAGS) -static -mno-llsc -mno-imadd -mno-mad +ASFLAGS += $(COMMON_FLAGS) -O0 +LDFLAGS += -EL diff --git a/abstract-machine/scripts/isa/riscv.mk b/abstract-machine/scripts/isa/riscv.mk new file mode 100644 index 0000000..e478842 --- /dev/null +++ b/abstract-machine/scripts/isa/riscv.mk @@ -0,0 +1,8 @@ +CROSS_COMPILE := riscv64-linux-gnu- +COMMON_CFLAGS := -fno-pic -march=rv64g -mcmodel=medany -mstrict-align +CFLAGS += $(COMMON_CFLAGS) -static +ASFLAGS += $(COMMON_CFLAGS) -O0 +LDFLAGS += -melf64lriscv + +# overwrite ARCH_H defined in $(AM_HOME)/Makefile +ARCH_H := arch/riscv.h diff --git a/abstract-machine/scripts/isa/x86.mk b/abstract-machine/scripts/isa/x86.mk new file mode 100644 index 0000000..d4e1b91 --- /dev/null +++ b/abstract-machine/scripts/isa/x86.mk @@ -0,0 +1,5 @@ +export CROSS_COMPILE := x86_64-linux-gnu- +CFLAGS += -m32 -fno-pic -fno-omit-frame-pointer -march=i386 +CFLAGS += -fcf-protection=none # remove endbr32 in Ubuntu 20.04 with a CPU newer than Comet Lake +ASFLAGS += -m32 -fno-pic +LDFLAGS += -melf_i386 diff --git a/abstract-machine/scripts/isa/x86_64.mk b/abstract-machine/scripts/isa/x86_64.mk new file mode 100644 index 0000000..5241d71 --- /dev/null +++ b/abstract-machine/scripts/isa/x86_64.mk @@ -0,0 +1,4 @@ +export CROSS_COMPILE := x86_64-linux-gnu- +CFLAGS += -m64 -fPIC -mno-sse +ASFLAGS += -m64 -fPIC +LDFLAGS += -melf_x86_64 diff --git a/abstract-machine/scripts/linker.ld b/abstract-machine/scripts/linker.ld new file mode 100644 index 0000000..8dfe8d7 --- /dev/null +++ b/abstract-machine/scripts/linker.ld @@ -0,0 +1,33 @@ +ENTRY(_start) +PHDRS { text PT_LOAD; data PT_LOAD; } + +SECTIONS { + /* _pmem_start and _entry_offset are defined in LDFLAGS */ + . = _pmem_start + _entry_offset; + .text : { + *(entry) + *(.text*) + } : text + etext = .; + _etext = .; + .rodata : { + *(.rodata*) + } + .data : { + *(.data) + } : data + edata = .; + _data = .; + .bss : { + _bss_start = .; + *(.bss*) + *(.sbss*) + *(.scommon) + } + _stack_top = ALIGN(0x1000); + . = _stack_top + 0x8000; + _stack_pointer = .; + end = .; + _end = .; + _heap_start = ALIGN(0x1000); +} diff --git a/abstract-machine/scripts/loongarch32r-nemu.mk b/abstract-machine/scripts/loongarch32r-nemu.mk new file mode 100644 index 0000000..906e7c2 --- /dev/null +++ b/abstract-machine/scripts/loongarch32r-nemu.mk @@ -0,0 +1,8 @@ +include $(AM_HOME)/scripts/isa/loongarch32r.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -DISA_H=\"loongarch/loongarch32r.h\" + +AM_SRCS += loongarch/nemu/start.S \ + loongarch/nemu/cte.c \ + loongarch/nemu/trap.S \ + loongarch/nemu/vme.c diff --git a/abstract-machine/scripts/mips32-nemu.mk b/abstract-machine/scripts/mips32-nemu.mk new file mode 100644 index 0000000..51afbf3 --- /dev/null +++ b/abstract-machine/scripts/mips32-nemu.mk @@ -0,0 +1,8 @@ +include $(AM_HOME)/scripts/isa/mips32.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -DISA_H=\"mips/mips32.h\" + +AM_SRCS += mips/nemu/start.S \ + mips/nemu/cte.c \ + mips/nemu/trap.S \ + mips/nemu/vme.c diff --git a/abstract-machine/scripts/native.mk b/abstract-machine/scripts/native.mk new file mode 100644 index 0000000..1a8fd03 --- /dev/null +++ b/abstract-machine/scripts/native.mk @@ -0,0 +1,27 @@ +AM_SRCS := native/trm.c \ + native/ioe.c \ + native/cte.c \ + native/trap.S \ + native/vme.c \ + native/mpe.c \ + native/platform.c \ + native/ioe/input.c \ + native/ioe/timer.c \ + native/ioe/gpu.c \ + native/ioe/audio.c \ + native/ioe/disk.c \ + +CFLAGS += -fpie +ASFLAGS += -fpie -pie +comma = , +LDFLAGS_CXX = $(addprefix -Wl$(comma), $(LDFLAGS)) + +image: + @echo + LD "->" $(IMAGE_REL) + @g++ -pie -o $(IMAGE) -Wl,--whole-archive $(LINKAGE) -Wl,-no-whole-archive $(LDFLAGS_CXX) -lSDL2 -ldl + +run: image + $(IMAGE) + +gdb: image + gdb -ex "handle SIGUSR1 SIGUSR2 SIGSEGV noprint nostop" $(IMAGE) diff --git a/abstract-machine/scripts/platform/nemu.mk b/abstract-machine/scripts/platform/nemu.mk new file mode 100644 index 0000000..8367675 --- /dev/null +++ b/abstract-machine/scripts/platform/nemu.mk @@ -0,0 +1,29 @@ +AM_SRCS := platform/nemu/trm.c \ + platform/nemu/ioe/ioe.c \ + platform/nemu/ioe/timer.c \ + platform/nemu/ioe/input.c \ + platform/nemu/ioe/gpu.c \ + platform/nemu/ioe/audio.c \ + platform/nemu/ioe/disk.c \ + platform/nemu/mpe.c + +CFLAGS += -fdata-sections -ffunction-sections +LDFLAGS += -T $(AM_HOME)/scripts/linker.ld \ + --defsym=_pmem_start=0x80000000 --defsym=_entry_offset=0x0 +LDFLAGS += --gc-sections -e _start +NEMUFLAGS += -l $(shell dirname $(IMAGE).elf)/nemu-log.txt + +CFLAGS += -DMAINARGS=\"$(mainargs)\" +CFLAGS += -I$(AM_HOME)/am/src/platform/nemu/include +.PHONY: $(AM_HOME)/am/src/platform/nemu/trm.c + +image: $(IMAGE).elf + @$(OBJDUMP) -d $(IMAGE).elf > $(IMAGE).txt + @echo + OBJCOPY "->" $(IMAGE_REL).bin + @$(OBJCOPY) -S --set-section-flags .bss=alloc,contents -O binary $(IMAGE).elf $(IMAGE).bin + +run: image + $(MAKE) -C $(NEMU_HOME) ISA=$(ISA) run ARGS="$(NEMUFLAGS)" IMG=$(IMAGE).bin + +gdb: image + $(MAKE) -C $(NEMU_HOME) ISA=$(ISA) gdb ARGS="$(NEMUFLAGS)" IMG=$(IMAGE).bin diff --git a/abstract-machine/scripts/platform/npc.mk b/abstract-machine/scripts/platform/npc.mk new file mode 100644 index 0000000..3136fec --- /dev/null +++ b/abstract-machine/scripts/platform/npc.mk @@ -0,0 +1,21 @@ +AM_SRCS := riscv/npc/start.S \ + riscv/npc/trm.c \ + riscv/npc/ioe.c \ + riscv/npc/timer.c \ + riscv/npc/input.c \ + riscv/npc/cte.c \ + riscv/npc/trap.S \ + platform/dummy/vme.c \ + platform/dummy/mpe.c + +CFLAGS += -fdata-sections -ffunction-sections +LDFLAGS += -T $(AM_HOME)/scripts/linker.ld \ + --defsym=_pmem_start=0x80000000 --defsym=_entry_offset=0x0 +LDFLAGS += --gc-sections -e _start +CFLAGS += -DMAINARGS=\"$(mainargs)\" +.PHONY: $(AM_HOME)/am/src/riscv/npc/trm.c + +image: $(IMAGE).elf + @$(OBJDUMP) -d $(IMAGE).elf > $(IMAGE).txt + @echo + OBJCOPY "->" $(IMAGE_REL).bin + @$(OBJCOPY) -S --set-section-flags .bss=alloc,contents -O binary $(IMAGE).elf $(IMAGE).bin diff --git a/abstract-machine/scripts/platform/qemu.mk b/abstract-machine/scripts/platform/qemu.mk new file mode 100644 index 0000000..67bcd67 --- /dev/null +++ b/abstract-machine/scripts/platform/qemu.mk @@ -0,0 +1,17 @@ +.PHONY: build-arg + +LDFLAGS += -N -Ttext-segment=0x00100000 +QEMU_FLAGS += -serial mon:stdio \ + -machine accel=tcg \ + -smp "$(smp)" \ + -drive format=raw,file=$(IMAGE) + +build-arg: image + @( echo -n $(mainargs); ) | dd if=/dev/stdin of=$(IMAGE) bs=512 count=2 seek=1 conv=notrunc status=none + +BOOT_HOME := $(AM_HOME)/am/src/x86/qemu/boot + +image: $(IMAGE).elf + @$(MAKE) -s -C $(BOOT_HOME) + @echo + CREATE "->" $(IMAGE_REL) + @( cat $(BOOT_HOME)/bootblock.o; head -c 1024 /dev/zero; cat $(IMAGE).elf ) > $(IMAGE) diff --git a/abstract-machine/scripts/riscv32-nemu.mk b/abstract-machine/scripts/riscv32-nemu.mk new file mode 100644 index 0000000..5f94579 --- /dev/null +++ b/abstract-machine/scripts/riscv32-nemu.mk @@ -0,0 +1,10 @@ +include $(AM_HOME)/scripts/isa/riscv.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -DISA_H=\"riscv/riscv.h\" +COMMON_CFLAGS += -march=rv32im_zicsr -mabi=ilp32 # overwrite +LDFLAGS += -melf32lriscv # overwrite + +AM_SRCS += riscv/nemu/start.S \ + riscv/nemu/cte.c \ + riscv/nemu/trap.S \ + riscv/nemu/vme.c diff --git a/abstract-machine/scripts/riscv32e-nemu.mk b/abstract-machine/scripts/riscv32e-nemu.mk new file mode 100644 index 0000000..bb965d8 --- /dev/null +++ b/abstract-machine/scripts/riscv32e-nemu.mk @@ -0,0 +1,10 @@ +include $(AM_HOME)/scripts/isa/riscv.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -DISA_H=\"riscv/riscv.h\" +COMMON_CFLAGS += -march=rv32em_zicsr -mabi=ilp32e # overwrite +LDFLAGS += -melf32lriscv # overwrite + +AM_SRCS += riscv/nemu/start.S \ + riscv/nemu/cte.c \ + riscv/nemu/trap.S \ + riscv/nemu/vme.c diff --git a/abstract-machine/scripts/riscv32e-npc.mk b/abstract-machine/scripts/riscv32e-npc.mk new file mode 100644 index 0000000..b2b6fec --- /dev/null +++ b/abstract-machine/scripts/riscv32e-npc.mk @@ -0,0 +1,10 @@ +include $(AM_HOME)/scripts/isa/riscv.mk +include $(AM_HOME)/scripts/platform/npc.mk +COMMON_CFLAGS += -march=rv32e_zicsr -mabi=ilp32e # overwrite +LDFLAGS += -melf32lriscv # overwrite + +AM_SRCS += riscv/npc/libgcc/div.S \ + riscv/npc/libgcc/muldi3.S \ + riscv/npc/libgcc/multi3.c \ + riscv/npc/libgcc/ashldi3.c \ + riscv/npc/libgcc/unused.c diff --git a/abstract-machine/scripts/riscv64-nemu.mk b/abstract-machine/scripts/riscv64-nemu.mk new file mode 100644 index 0000000..e9959a5 --- /dev/null +++ b/abstract-machine/scripts/riscv64-nemu.mk @@ -0,0 +1,8 @@ +include $(AM_HOME)/scripts/isa/riscv.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -DISA_H=\"riscv/riscv.h\" + +AM_SRCS += riscv/nemu/start.S \ + riscv/nemu/cte.c \ + riscv/nemu/trap.S \ + riscv/nemu/vme.c diff --git a/abstract-machine/scripts/spike.mk b/abstract-machine/scripts/spike.mk new file mode 100644 index 0000000..4ea391c --- /dev/null +++ b/abstract-machine/scripts/spike.mk @@ -0,0 +1,19 @@ +include $(AM_HOME)/scripts/isa/riscv.mk + +AM_SRCS := riscv/spike/trm.c \ + riscv/spike/ioe.c \ + riscv/spike/timer.c \ + riscv/spike/start.S \ + riscv/spike/htif.S \ + platform/dummy/cte.c \ + platform/dummy/vme.c \ + platform/dummy/mpe.c \ + +CFLAGS += -fdata-sections -ffunction-sections +LDFLAGS += -T $(AM_HOME)/am/src/riscv/spike/linker.ld +LDFLAGS += --gc-sections -e _start + +CFLAGS += -DMAINARGS=\"$(mainargs)\" +.PHONY: $(AM_HOME)/am/src/riscv/spike/trm.c + +image: $(IMAGE).elf diff --git a/abstract-machine/scripts/x86-nemu.mk b/abstract-machine/scripts/x86-nemu.mk new file mode 100644 index 0000000..c68676e --- /dev/null +++ b/abstract-machine/scripts/x86-nemu.mk @@ -0,0 +1,10 @@ +include $(AM_HOME)/scripts/isa/x86.mk +include $(AM_HOME)/scripts/platform/nemu.mk +CFLAGS += -mstringop-strategy=loop -DISA_H=\"x86/x86.h\" +# overwrite _pmem_start and _entry_offset defined in nemu.mk +LDFLAGS += --defsym=_pmem_start=0x0 --defsym=_entry_offset=0x100000 + +AM_SRCS += x86/nemu/start.S \ + x86/nemu/cte.c \ + x86/nemu/trap.S \ + x86/nemu/vme.c diff --git a/abstract-machine/scripts/x86-qemu.mk b/abstract-machine/scripts/x86-qemu.mk new file mode 100644 index 0000000..437069c --- /dev/null +++ b/abstract-machine/scripts/x86-qemu.mk @@ -0,0 +1,13 @@ +include $(AM_HOME)/scripts/isa/x86.mk +include $(AM_HOME)/scripts/platform/qemu.mk + +AM_SRCS := x86/qemu/start32.S \ + x86/qemu/trap32.S \ + x86/qemu/trm.c \ + x86/qemu/cte.c \ + x86/qemu/ioe.c \ + x86/qemu/vme.c \ + x86/qemu/mpe.c + +run: build-arg + @qemu-system-i386 $(QEMU_FLAGS) diff --git a/abstract-machine/scripts/x86_64-qemu.mk b/abstract-machine/scripts/x86_64-qemu.mk new file mode 100644 index 0000000..f690696 --- /dev/null +++ b/abstract-machine/scripts/x86_64-qemu.mk @@ -0,0 +1,13 @@ +include $(AM_HOME)/scripts/isa/x86_64.mk +include $(AM_HOME)/scripts/platform/qemu.mk + +AM_SRCS := x86/qemu/start64.S \ + x86/qemu/trap64.S \ + x86/qemu/trm.c \ + x86/qemu/cte.c \ + x86/qemu/ioe.c \ + x86/qemu/vme.c \ + x86/qemu/mpe.c + +run: build-arg + @qemu-system-x86_64 $(QEMU_FLAGS) diff --git a/abstract-machine/tools/logisim-img.py b/abstract-machine/tools/logisim-img.py new file mode 100644 index 0000000..c5c5992 --- /dev/null +++ b/abstract-machine/tools/logisim-img.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +from sys import argv + +bin = argv[1] + +fp_inst = open(bin + '-logisim-inst.txt', 'w') +fp_data0 = open(bin + '-logisim-data0.txt', 'w') +fp_data1 = open(bin + '-logisim-data1.txt', 'w') +fp_data2 = open(bin + '-logisim-data2.txt', 'w') +fp_data3 = open(bin + '-logisim-data3.txt', 'w') +for f in [fp_inst, fp_data0, fp_data1, fp_data2, fp_data3]: + f.write('v2.0 raw\n') + +with open(bin, 'rb') as fp: + while True: + bytes = fp.read(4) + if not bytes: + break + fp_inst.write(bytes[::-1].hex() + ' ') + fp_data0.write("%02x " % bytes[0]) + fp_data1.write("%02x " % bytes[1]) + fp_data2.write("%02x " % bytes[2]) + fp_data3.write("%02x " % bytes[3]) + fp.close() + +for f in [fp_inst, fp_data0, fp_data1, fp_data2, fp_data3]: + f.close()