From 8665aaf7d27f17609ea199f0479b39bda9db878d Mon Sep 17 00:00:00 2001 From: xinyangli Date: Fri, 12 Apr 2024 14:08:52 +0800 Subject: [PATCH] fix(npc): fix branch, load memory and slt related instructions --- nemu/Kconfig | 2 +- npc/.gitignore | 1 + npc/cmake/ChiselBuild.cmake | 3 +- npc/core/src/main/resources/RamDpi.v | 2 + npc/core/src/main/scala/components/ALU.scala | 8 +- npc/core/src/main/scala/components/Mem.scala | 2 + npc/core/src/main/scala/top/FlowMain.scala | 103 ++++++++++++------- npc/csrc/Flow/main.cpp | 2 +- npc/include/components.hpp | 38 ++++--- npc/utils/sdb/CMakeLists.txt | 6 +- npc/utils/sdb/addrexp.y | 1 + 11 files changed, 105 insertions(+), 63 deletions(-) diff --git a/nemu/Kconfig b/nemu/Kconfig index 85074bd..5816143 100644 --- a/nemu/Kconfig +++ b/nemu/Kconfig @@ -180,7 +180,7 @@ config ITRACE_BUFFER default 10 config MTRACE - depends on TRACE && TARGET_NATIVE_ELF && ENGINE_INTERPRETER + depends on TRACE bool "Enable memory tracing" default n diff --git a/npc/.gitignore b/npc/.gitignore index e0793d8..5a4f7bc 100644 --- a/npc/.gitignore +++ b/npc/.gitignore @@ -16,3 +16,4 @@ hs_err_pid* .direnv/ compile_commands.json +*.vcd diff --git a/npc/cmake/ChiselBuild.cmake b/npc/cmake/ChiselBuild.cmake index aa830f7..fd838f7 100644 --- a/npc/cmake/ChiselBuild.cmake +++ b/npc/cmake/ChiselBuild.cmake @@ -3,6 +3,7 @@ # NOTE: Must reconfigure if we add new files in SCALA_CORE directory file(GLOB_RECURSE SCALA_CORE_SOURCES "${SCALA_CORE}/src/main/scala/*.scala") file(GLOB_RECURSE SCALA_CORE_RESOURCES "${SCALA_CORE}/src/main/resources/*") +message(STATUS "Found scala source file: ${SCALA_CORE_SOURCES}") set(CHISEL_DEPENDENCY ${SCALA_CORE_SOURCES} ${SCALA_CORE_RESOURCES} ${SCALA_CORE}/build.sbt) if(BUILD_USE_BLOOP) @@ -39,7 +40,7 @@ else() COMMAND sbt "run ${CHISEL_EMIT_ARGS}" COMMAND ${CMAKE_COMMAND} -E copy_directory_if_different ${CHISEL_OUTPUT_TMP_DIR} ${CHISEL_OUTPUT_DIR} WORKING_DIRECTORY ${SCALA_CORE} - # DEPENDS ${CHISEL_DEPENDENCY} test.scala + DEPENDS ${CHISEL_DEPENDENCY} VERBATIM COMMENT "Run sbt from CMake" ) diff --git a/npc/core/src/main/resources/RamDpi.v b/npc/core/src/main/resources/RamDpi.v index c9f8b7c..0f4308b 100644 --- a/npc/core/src/main/resources/RamDpi.v +++ b/npc/core/src/main/resources/RamDpi.v @@ -2,6 +2,8 @@ import "DPI-C" function int pmem_read(input int addr); import "DPI-C" function void pmem_write(input int waddr, input int wdata, input byte wmask); module RamDpi ( + input clock, + input reset, input writeEnable, input valid, input [31:0] writeAddr, diff --git a/npc/core/src/main/scala/components/ALU.scala b/npc/core/src/main/scala/components/ALU.scala index 1626621..44e9c14 100644 --- a/npc/core/src/main/scala/components/ALU.scala +++ b/npc/core/src/main/scala/components/ALU.scala @@ -6,13 +6,13 @@ import shapeless.{HNil, ::} class ALUControlInterface extends Bundle { object OpSelect extends ChiselEnum { - val aOpAdd, aOpSub, aOpNot, aOpAnd, aOpOr, aOpXor, aOpSlt, aOpSll, aOpSrl, aOpSra = Value + val aOpAdd, aOpSub, aOpNot, aOpAnd, aOpOr, aOpXor, aOpSlt, aOpSltu, aOpSll, aOpSrl, aOpSra = Value } object SrcASelect extends ChiselEnum { val aSrcARs1, aSrcAPc, aSrcAZero = Value } object SrcBSelect extends ChiselEnum { - val aSrcBRs2, aSrcBImmI, aSrcBImmJ, aSrcBImmB, aSrcBImmS, aSrcBImmU = Value + val aSrcBRs2, aSrcBImmI, aSrcBImmJ, aSrcBImmS, aSrcBImmU = Value } val op = Input(OpSelect()) val srcASelect = Input(SrcASelect()) @@ -45,7 +45,8 @@ class ALU[T <: UInt](tpe: T) extends Module { val not = ~a val or = a | b val xor = a ^ b - val slt = a < b + val slt = a.asSInt < b.asSInt + val sltu = a < b val sll = a << b(log2Ceil(tpe.getWidth), 0) val srl = a >> b(log2Ceil(tpe.getWidth), 0) val sra = a.asSInt >> b(log2Ceil(tpe.getWidth), 0) @@ -61,6 +62,7 @@ class ALU[T <: UInt](tpe: T) extends Module { aOpOr -> or, aOpXor -> xor, aOpSlt -> slt, + aOpSltu -> sltu, aOpSll -> sll, aOpSrl -> srl, aOpSra -> sra.asUInt diff --git a/npc/core/src/main/scala/components/Mem.scala b/npc/core/src/main/scala/components/Mem.scala index 7562730..24f9927 100644 --- a/npc/core/src/main/scala/components/Mem.scala +++ b/npc/core/src/main/scala/components/Mem.scala @@ -23,6 +23,8 @@ class RamControlInterface(addrWidth: Int) extends Bundle { * We need a way to merge two bundles together */ class RamInterface[T <: Data](tpe: T, addrWidth: Int) extends RamControlInterface(addrWidth) { + val clock = Input(Clock()) + val reset = Input(Reset()) val writeAddr = Input(UInt(addrWidth.W)) val writeData = Input(tpe) val readAddr = Input(UInt(addrWidth.W)) diff --git a/npc/core/src/main/scala/top/FlowMain.scala b/npc/core/src/main/scala/top/FlowMain.scala index de77b38..4fb2e15 100644 --- a/npc/core/src/main/scala/top/FlowMain.scala +++ b/npc/core/src/main/scala/top/FlowMain.scala @@ -152,33 +152,33 @@ class Control(width: Int) extends RawModule { r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (beq , (r(false.B) :: l(WriteSelect) :: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: l(Bool()) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBRs2) :: l(Bool()) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (bne , (r(false.B) :: l(WriteSelect) :: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: l(Bool()) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBRs2) :: l(Bool()) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (blt , (r(false.B) :: l(WriteSelect) :: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: r(true.B) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(true.B) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (bge , (r(false.B) :: l(WriteSelect) :: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: r(true.B) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(true.B) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (bltu , (r(false.B) :: l(WriteSelect):: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: r(false.B) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSltu) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(false.B) :: r(false.B) :: l(UInt(4.W)) :: r(false.B) :: HNil)), (bgeu , (r(false.B) :: l(WriteSelect):: - r(true.B) :: r(pExeOut) :: - r(aOpSlt) :: r(aSrcAPc) :: r(aSrcBImmB) :: r(false.B) :: + r(true.B) :: r(pStaticNpc) :: + r(aOpSltu) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(false.B) :: r(false.B) :: l(UInt(4.W)) :: r(false.B) :: HNil)), // ---- Memory Access Instructions ---- @@ -186,32 +186,42 @@ class Control(width: Int) extends RawModule { (lb , (r(true.B) :: r(rMemOut) :: r(false.B) :: r(pStaticNpc) :: r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: l(UInt(4.W)) :: r(false.B) :: HNil)), + r(true.B) :: r(1.U(4.W)) :: r(false.B) :: HNil)), - (lh , (r(true.B) :: r(rMemOut) :: + (lbu , (r(true.B) :: r(rMemOut) :: r(false.B) :: r(pStaticNpc) :: r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: l(UInt(4.W)) :: r(false.B) :: HNil)), + r(true.B) :: r(0.U(4.W)) :: r(false.B) :: HNil)), + + (lh , (r(true.B) :: r(rMemOut) :: + r(false.B) :: r(pStaticNpc) :: + r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: + r(true.B) :: r(3.U(4.W)) :: r(false.B) :: HNil)), + + (lhu , (r(true.B) :: r(rMemOut) :: + r(false.B) :: r(pStaticNpc) :: + r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: + r(true.B) :: r(2.U(4.W)) :: r(false.B) :: HNil)), (lw , (r(true.B) :: r(rMemOut) :: r(false.B) :: r(pStaticNpc) :: r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: l(UInt(4.W)) :: r(false.B) :: HNil)), + r(true.B) :: r(14.U(4.W)) :: r(false.B) :: HNil)), (sb , (r(false.B) :: l(WriteSelect):: r(false.B) :: r(pStaticNpc) :: - r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: r(1.U(4.W)) :: r(false.B) :: HNil)), + r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmS) :: l(Bool()) :: + r(true.B) :: r(1.U(4.W)) :: r(true.B) :: HNil)), (sh , (r(false.B) :: l(WriteSelect):: r(false.B) :: r(pStaticNpc) :: - r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: r(3.U(4.W)) :: r(false.B) :: HNil)), + r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmS) :: l(Bool()) :: + r(true.B) :: r(3.U(4.W)) :: r(true.B) :: HNil)), (sw , (r(false.B) :: l(WriteSelect):: r(false.B) :: r(pStaticNpc) :: - r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmI) :: l(Bool()) :: - r(true.B) :: r(15.U(4.W)) :: r(false.B) :: HNil)), + r(aOpAdd) :: r(aSrcARs1) :: r(aSrcBImmS) :: l(Bool()) :: + r(true.B) :: r(15.U(4.W)) :: r(true.B) :: HNil)), // ---- Integer Computational Instructions --- @@ -227,7 +237,7 @@ class Control(width: Int) extends RawModule { (sltiu , (r(true.B) :: r(rAluOut) :: r(false.B) :: r(pStaticNpc) :: - r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBImmI) :: r(false.B) :: + r(aOpSltu) :: r(aSrcARs1) :: r(aSrcBImmI) :: r(false.B) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (xori , (r(true.B) :: r(rAluOut) :: @@ -282,7 +292,7 @@ class Control(width: Int) extends RawModule { (sltu , (r(true.B) :: r(rAluOut) :: r(false.B) :: r(pStaticNpc) :: - r(aOpSlt) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(false.B) :: + r(aOpSltu) :: r(aSrcARs1) :: r(aSrcBRs2) :: r(false.B) :: r(false.B) :: l(UInt(4.W)):: r(false.B) :: HNil)), (xor , (r(true.B) :: r(rAluOut) :: @@ -328,8 +338,6 @@ class Control(width: Int) extends RawModule { .foreach({ case (src, dst) => dst.toOption.get := src.asTypeOf(dst.toOption.get) }) - - pc.useImmB := DontCare } import flow.components.{RegisterFile, ProgramCounter, ALU, RamDpi} @@ -355,37 +363,60 @@ class Flow extends Module { val npc = Wire(dataType) npc := pc.out + 4.U pc.in.exeOut := alu.out.result - pc.in.immB := Cat(Fill(20, inst(31)), inst(7), inst(30, 25), inst(11, 8), inst(0)) + pc.in.immB := Cat(Fill(20, inst(31)), inst(7), inst(30, 25), inst(11, 8), 0.U(1.W)) control.inst := inst reg.control <> control.reg // FIXME: Probably optimizable with bulk connection pc.control <> control.pc + pc.control.useImmB := control.pc.useImmB alu.control <> control.alu val branchUseSlt = Wire(Bool()) val branchInvertResult = Wire(Bool()) branchUseSlt := inst(14) branchInvertResult := inst(12) val _branchResult = Mux(branchUseSlt, alu.out.result(0), alu.out.eq) - val branchResult = Mux(branchInvertResult, _branchResult, !_branchResult) - pc.control.useImmB := control.pc.useImmB && _branchResult + val branchResult = Mux(branchInvertResult, !_branchResult, _branchResult) + pc.control.useImmB := control.pc.useImmB && branchResult + // printf(cf"_branchResult = ${_branchResult}, branchResult = ${branchResult}\n") + // printf(cf"pcin.useImmB = ${pc.control.useImmB}, control.out.useImmB = ${control.pc.useImmB} \n") import control.reg.WriteSelect._ reg.in.writeData(lit(rAluOut)) := alu.out.result - // TODO: Read address in load command goes here - reg.in.writeData(lit(rMemOut)) := ram.io.readData + val maskedData = ram.io.readData & Cat( + Fill(8, ram.io.writeMask(3)), + Fill(8, ram.io.writeMask(2)), + Fill(8, ram.io.writeMask(1)), + "b11111111".U) + + val doSignExt = control.ram.writeMask(0) + val signExt16 = control.ram.writeMask(1) + when(!doSignExt) { + reg.in.writeData(lit(rMemOut)) := maskedData + // printf(cf"!doSignExt\n") + }.elsewhen(signExt16) { + reg.in.writeData(lit(rMemOut)) := Cat(Fill(16, maskedData(15)), maskedData(15, 0)) + // printf(cf"elsewhen\n") + }.otherwise { + reg.in.writeData(lit(rMemOut)) := Cat(Fill(24, maskedData(7)), maskedData(7, 0)) + // printf(cf"otherwise\n") + } + // printf(cf"maskedData = ${maskedData}, writeData = ${reg.in.writeData(lit(rMemOut))}\n") reg.in.writeData(lit(rNpc)) := npc reg.in.writeAddr := inst(11, 7) reg.in.rs(0) := inst(19, 15) // rs1 reg.in.rs(1) := inst(24, 20) // rs2 - // TODO: Memory write goes here + // TODO: Bulk connection here + // FIXME: The following 2 lines won't compile with bloop + ram.io.clock := clock + ram.io.reset := reset ram.io.writeAddr := alu.out.result ram.io.writeData := reg.out.src(1) ram.io.writeMask := control.ram.writeMask ram.io.writeEnable := control.ram.writeEnable - ram.io.valid := true.B + ram.io.valid := control.ram.valid ram.io.readAddr := alu.out.result import control.alu.SrcASelect._ @@ -398,11 +429,9 @@ class Flow extends Module { // alu.in.b(lit(aSrcBImmI)) := inst(31, 20).pad(aSrcBImmI.getWidth) alu.in.b(lit(aSrcBImmI)) := Cat(Fill(20, inst(31)), inst(31, 20)) alu.in.b(lit(aSrcBImmJ)) := Cat(Fill(12, inst(31)), inst(19, 12), inst(20), inst(30, 25), inst(24, 21), 0.U(1.W)) - alu.in.b(lit(aSrcBImmB)) := Cat(Fill(20, inst(31)), inst(7), inst(30, 25), inst(11, 8), inst(0)) - alu.in.b(lit(aSrcBImmS)) := Cat(inst(31), inst(19, 12), inst(20), inst(30, 25), inst(24, 21), 0.U(1.W)).pad(aSrcBImmS.getWidth) + alu.in.b(lit(aSrcBImmS)) := Cat(Fill(20, inst(31)), inst(31), inst(30, 25), inst(11, 8), inst(7)) alu.in.b(lit(aSrcBImmU)) := Cat(inst(31, 12), 0.U(12.W)) - printf(cf"inst = $inst\n"); - Trace.traceName(pc.out); + Trace.traceName(pc.out) dontTouch(control.out) } diff --git a/npc/csrc/Flow/main.cpp b/npc/csrc/Flow/main.cpp index 7e962d5..664db56 100644 --- a/npc/csrc/Flow/main.cpp +++ b/npc/csrc/Flow/main.cpp @@ -133,7 +133,7 @@ int main(int argc, char **argv, char **env) { std::filesystem::path ref{config.lib_ref}; RefTrmInterface ref_interface{ref}; DifftestTrmInterface diff_interface{NPC::npc_interface, ref_interface, - pmem_get(), 1024}; + pmem_get(), 128 * 1024}; SDB::SDB sdb_diff{diff_interface}; int t = 8; diff --git a/npc/include/components.hpp b/npc/include/components.hpp index 15cea34..91847a7 100644 --- a/npc/include/components.hpp +++ b/npc/include/components.hpp @@ -4,6 +4,7 @@ #include "types.h" #include #include +#include #include #include #include @@ -32,18 +33,6 @@ public: }; template class Memory { - std::size_t addr_to_index(std::size_t addr) { - extern bool g_skip_memcheck; - if (g_skip_memcheck) { - return 0; - } - if (addr < 0x80000000 || addr > 0x87ffffff) { - std::cerr << std::hex << "ACCESS " << addr << std::dec << std::endl; - throw std::runtime_error("Invalid memory access"); - } - // Linear mapping - return (addr >> 2) - 0x20000000; - } uint32_t expand_bits(uint8_t bits) { uint32_t x = bits; x = (x | (x << 7) | (x << 14) | (x << 21)) & 0x01010101; @@ -77,9 +66,9 @@ public: /** * Always reads and returns 4 bytes from the address raddr & ~0x3u. */ - T read(int raddr) { + T read(paddr_t raddr) { // printf("raddr: 0x%x\n", raddr); - return mem[addr_to_index((uint32_t)raddr)]; + return *(word_t *)guest_to_host(raddr); } /** * Always writes to the 4 bytes at the address `waddr` & ~0x3u. @@ -87,12 +76,27 @@ public: * For example, wmask = 0x3 means only the lowest 2 bytes are written, * and the other bytes in memory remain unchanged. */ - void write(int waddr, T wdata, char wmask) { + void write(paddr_t waddr, T wdata, char wmask) { // printf("waddr: 0x%x\n", waddr); - mem[addr_to_index((uint32_t)waddr)] = expand_bits(wmask) & wdata; + uint8_t *p_data = (uint8_t *)&wdata; + while (wmask & 0x1) { + memcpy(guest_to_host(waddr), p_data, 1); + waddr++; + p_data++; + wmask >>= 1; + } } void *guest_to_host(std::size_t addr) { - return mem.data() + addr_to_index(addr); + extern bool g_skip_memcheck; + if (g_skip_memcheck) { + return mem.data(); + } + if (addr < 0x80000000 || addr > 0x87ffffff) { + std::cerr << std::hex << "ACCESS " << addr << std::dec << std::endl; + throw std::runtime_error("Invalid memory access"); + } + // Linear mapping + return (uint8_t *)(mem.data() + (addr >> 2) - 0x20000000) + (addr & 0x3); } void trace(paddr_t addr, bool is_read, word_t pc = 0, word_t value = 0) { for (auto &r : trace_ranges) { diff --git a/npc/utils/sdb/CMakeLists.txt b/npc/utils/sdb/CMakeLists.txt index 240dbac..45dd6e6 100644 --- a/npc/utils/sdb/CMakeLists.txt +++ b/npc/utils/sdb/CMakeLists.txt @@ -4,14 +4,14 @@ find_package(BISON REQUIRED) set(PARSER_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(LEXER_OUT "${PARSER_DIR}/lexer.c") set(PARSER_OUT "${PARSER_DIR}/parser.c") -flex_target(LEXER addrexp.l "${LEXER_OUT}" DEFINES_FILE "${PARSER_DIR}/include/addrexp_lex.h") -bison_target(PARSER addrexp.y "${PARSER_OUT}" DEFINES_FILE "${PARSER_DIR}/include/addrexp.h") +flex_target(LEXER addrexp.l "${LEXER_OUT}" DEFINES_FILE "${PARSER_DIR}/addrexp_lex.h") +bison_target(PARSER addrexp.y "${PARSER_OUT}" DEFINES_FILE "${PARSER_DIR}/addrexp.h") add_flex_bison_dependency(LEXER PARSER) add_library(sdb sdb.cpp console.cpp disasm.cpp "${LEXER_OUT}" "${PARSER_OUT}") llvm_map_components_to_libnames(LLVM_LIBS ${LLVM_TARGETS_TO_BUILD}) target_link_libraries(sdb PUBLIC ${LLVM_LIBS}) target_link_libraries(sdb PRIVATE ${Readline_LIBRARY}) -target_include_directories(sdb PRIVATE ${PARSER_DIR}/include) +target_include_directories(sdb PRIVATE ${PARSER_DIR}) target_include_directories(sdb PRIVATE ${Readline_INCLUDE_DIR}) target_include_directories(sdb PUBLIC include) diff --git a/npc/utils/sdb/addrexp.y b/npc/utils/sdb/addrexp.y index ca1a5c9..477d8dd 100644 --- a/npc/utils/sdb/addrexp.y +++ b/npc/utils/sdb/addrexp.y @@ -22,6 +22,7 @@ %parse-param { uint32_t *result } %left '-' '+' %left '*' '/' +%expect 68 %% input