diff --git a/examples/simple_system/ibex_simple_system.core b/examples/simple_system/ibex_simple_system.core index e4844823ec..ad773acd75 100644 --- a/examples/simple_system/ibex_simple_system.core +++ b/examples/simple_system/ibex_simple_system.core @@ -43,6 +43,12 @@ parameters: paramtype: vlogdefine description: "Register file implementation parameter enum. See the ibex_pkg::regfile_e enum in ibex_pkg.sv for permitted values." + INSTR_CYCLE_DELAY: + datatype: int + default: 0 + paramtype: vlogdefine + description: "Number of cycles to delay the instruction RAM access. This is on top of the single-cycle access that the RAM requires." + ICache: datatype: int default: 0 @@ -137,6 +143,7 @@ targets: - RV32B - RV32ZC - RegFile + - INSTR_CYCLE_DELAY - ICache - ICacheScramble - ICacheECC diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv index 155fbe8936..89ef13fe75 100644 --- a/examples/simple_system/rtl/ibex_simple_system.sv +++ b/examples/simple_system/rtl/ibex_simple_system.sv @@ -22,6 +22,10 @@ `define RegFile ibex_pkg::RegFileFF `endif +`ifndef INSTR_CYCLE_DELAY + `define INSTR_CYCLE_DELAY 0 +`endif + /** * Ibex simple system * @@ -274,6 +278,7 @@ module ibex_simple_system ( // SRAM block for instruction and data storage ram_2p #( .Depth(1024*1024/4), + .BExtraDelay(`INSTR_CYCLE_DELAY), .MemInitFile(SRAMInitFile) ) u_ram ( .clk_i (clk_sys), diff --git a/examples/sw/benchmarks/README.md b/examples/sw/benchmarks/README.md index 05294b80b6..c517fa005c 100644 --- a/examples/sw/benchmarks/README.md +++ b/examples/sw/benchmarks/README.md @@ -1,6 +1,6 @@ # Benchmarks -This directory contains benchmarks that can be run on ibex simple system. +This directory contains benchmarks that can be run on Ibex simple system. Benchmarks may rely on code external to this directory (e.g. it may be found in `vendor/`) see the specific benchmark information below for details on how to build and run each benchmark and where benchmark code is located. @@ -10,7 +10,7 @@ build and run each benchmark and where benchmark code is located. All of these benchmarks run on Simple System. A verilator simulation suitable for running them can be built with: -``` +```shell fusesoc --cores-root=. run --target=sim --setup --build lowrisc:ibex:ibex_simple_system `./util/ibex_config.py maxperf fusesoc_opts` ``` @@ -18,6 +18,13 @@ This will build a simulation of Ibex in the 'maxperf' configuration. It is one of several pre-defined ibex configurations, others can be used. These are specified in the `ibex_configs.yaml` file. +You can also test how the instruction cache affects performance. +To see this effect you must specify a delay on the instruction accesses becaus with single-cycle RAM access an isntruction cache does not help. + +```shell +fusesoc --cores-root=. run --target=sim --setup --build lowrisc:ibex:ibex_simple_system `./util/ibex_config.py maxperf-pmp-bmfull-icache fusesoc_opts` --INSTR_CYCLE_DELAY=5 +``` + See examples/simple_system/README.md for full details. ## CoreMark @@ -32,13 +39,13 @@ running on simple system is found in `examples/sw/benchmarks/coremark`. To build CoreMark: -``` +```shell make -C ./examples/sw/benchmarks/coremark/ ``` To run CoreMark (after building a suitable simulator binary, see above): -``` +```shell build/lowrisc_ibex_ibex_simple_system_0/sim-verilator/Vibex_simple_system --meminit=ram,examples/sw/benchmarks/coremark/coremark.elf ``` @@ -71,7 +78,7 @@ Correct operation validated. See README.md for run and reporting rules. Different ISAs (to choose different RISC-V ISA extensions) can be selected by passing the desired ISA string into `RV_ISA` when invoking make. -``` +```shell make -C ./examples/sw/benchmarks/coremark clean make -C ./examples/sw/benchmarks/coremark RV_ISA=rv32imc ``` diff --git a/shared/rtl/ram_2p.sv b/shared/rtl/ram_2p.sv index 20ef1ff154..4a72db2f42 100644 --- a/shared/rtl/ram_2p.sv +++ b/shared/rtl/ram_2p.sv @@ -4,12 +4,17 @@ /** * Dual-port RAM with 1 cycle read/write delay, 32 bit words. + * Optionally an extra delay can be added to the B-side requests. + * This can be used to mimick an asymmetric memory delay like having the stack + * and heap live in fast SRAM and the intructions live in slow, non-volatile + * memory. */ `include "prim_assert.sv" module ram_2p #( parameter int Depth = 128, + parameter int BExtraDelay = 0, parameter MemInitFile = "" ) ( input clk_i, @@ -55,16 +60,34 @@ module ram_2p #( end end + logic b_rvalid_d; + logic b_rvalid_q[(BExtraDelay==0) ? 1 : BExtraDelay]; + logic [31:0] b_rdata_d; + logic [31:0] b_rdata_q[(BExtraDelay==0) ? 1 : BExtraDelay]; + always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin a_rvalid_o <= '0; b_rvalid_o <= '0; + for (integer i = 0; i < BExtraDelay; i=i+1) begin + b_rvalid_q[i] <= '0; + b_rdata_q[i] <= '0; + end end else begin a_rvalid_o <= a_req_i; - b_rvalid_o <= b_req_i; + b_rvalid_o <= b_rvalid_d; + b_rvalid_q[0] <= b_req_i; + b_rdata_q[0] <= b_rdata_d; + for (integer i = BExtraDelay-1; i > 0; i=i-1) begin + b_rvalid_q[i] <= b_rvalid_q[i-1]; + b_rdata_q[i] <= b_rdata_q[i-1]; + end end end + assign b_rvalid_d = (BExtraDelay == 0) ? b_req_i : b_rvalid_q[BExtraDelay-1]; + assign b_rdata_o = (BExtraDelay == 0) ? b_rdata_d : b_rdata_q[BExtraDelay-1]; + prim_ram_2p #( .Width(32), .Depth(Depth), @@ -84,7 +107,7 @@ module ram_2p #( .b_addr_i (b_addr_idx), .b_wdata_i (b_wdata_i), .b_wmask_i (b_wmask), - .b_rdata_o (b_rdata_o), + .b_rdata_o (b_rdata_d), .cfg_i ('0), .cfg_rsp_o () );