add files

This commit is contained in:
Eric Yu 2023-12-23 18:41:58 -08:00
parent a785f50883
commit 0ca09cbf8d
73 changed files with 3356 additions and 0 deletions

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
*.log
*.jou
*.ini
*.wlf
work
transcript
prj
.Xil

View File

@ -0,0 +1,24 @@
# Simple ARM Pipelined CPU
## Introduction
A simple 64-bit ARM CPU with Pipelining. The pipelined CPU will have 1 delay slot after each load and branch instruction. The CPU instructions to be implemented are listed below. The data memory and instruction memory modules are provided in the files “datamem.sv” and “instructmem.sv” respectively. To simulate the CPU, head over to "tools/sim" irectory. Change the program loaded by editing the filename specified in “instructmem.sv”
#### Instruction set:
```
ADDI Rd, Rn, Imm12: Reg[Rd] = Reg[Rn] + ZeroExtend(Imm12).
ADDS Rd, Rn, Rm: Reg[Rd] = Reg[Rn] + Reg[Rm]. Set flags.
B Imm26: PC = PC + SignExtend(Imm26 << 2).
For lab #4 (only) this instr. has a delay slot.
B.LT Imm19: If (flags.negative != flags.overflow) PC = PC + SignExtend(Imm19<<2).
For lab #4 (only) this instr. has a delay slot.
BL Imm26: X30 = PC + 4 (instruction after this one), PC = PC + SignExtend(Imm26<<2).
For lab #4 (only) this instr. has a delay slot.
BR Rd: PC = Reg[Rd].
For lab #4 (only) this instr. has a delay slot.
CBZ Rd, Imm19: If (Reg[Rd] == 0) PC = PC + SignExtend(Imm19<<2).
For lab #4 (only) this instr. has a delay slot.
LDUR Rd, [Rn, #Imm9]: Reg[Rd] = Mem[Reg[Rn] + SignExtend(Imm9)].
For lab #4 (only) the value in rd cannot be used in the next cycle.
STUR Rd, [Rn, #Imm9]: Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rd].
SUBS Rd, Rn, Rm: Reg[Rd] = Reg[Rn] - Reg[Rm]. Set flags.
```

1
documents/README.md Normal file
View File

@ -0,0 +1 @@

BIN
documents/spec_pipline.pdf Normal file

Binary file not shown.

Binary file not shown.

251
src/hdl/CPU_pipelined.sv Normal file
View File

@ -0,0 +1,251 @@
// partial LEGV arm-based piplined single-core CPU
`timescale 1ns/10ps
module CPU_pipelined #(parameter DELAY_NS=0.05) (reset, clk);
input logic reset, clk;
logic pcSrIF, BrTakenIF, UnCondBrIF;
logic [63:0] rd2EX; // exec stage wire that has to be on top cuz we r not amd
// Wire naming convention (for the newer ones): portSTAGE, purposePORT (from). stage is the location a wire is at, and port is the port name of a module of a wire
// generally wire names should be the port/pin that is coming FROM
logic Reg2LocID, ALUSrcID, Mem2RegID, RegWriteID, MemWriteID, MemReadID, BrTakenID, UnCondBrID, writeSrID, pcSrID, immiID, flag_enID, rdSrcID;
logic Reg2LocEX, ALUSrcEX, Mem2RegEX, RegWriteEX, MemWriteEX, MemReadEX, BrTakenEX, UnCondBrEX, writeSrEX, pcSrEX, immiEX, flag_enEX, rdSrcEX;
logic Reg2LocMEM, ALUSrcMEM, Mem2RegMEM, RegWriteMEM, MemWriteMEM, MemReadMEM, BrTakenMEM, UnCondBrMEM, writeSrMEM, pcSrMEM, immiMEM, flag_enMEM, rdSrcMEM;
logic Reg2LocWB, ALUSrcWB, Mem2RegWB, RegWriteWB, MemWriteWB, MemReadWB, BrTakenWB, UnCondBrWB, writeSrWB, pcSrWB, immiWB, flag_enWB, rdSrcWB;
logic [2:0] ALUOpID, ALUOpEX;
// control piplines:
DFF64 #(.N(3)) ALUop_IDEX (.q(ALUOpEX), .d(ALUOpID), .reset, .clk, .en(1'b1));
D_FF ALUSrc_IDEX (.q(ALUSrcEX), .d(ALUSrcID), .reset, .clk);
D_FF MemWrite_IDEX (.q(MemWriteEX), .d(MemWriteID), .reset, .clk);
D_FF Mem2Reg_IDEX (.q(Mem2RegEX), .d(Mem2RegID), .reset, .clk);
D_FF MemRead_IDEX (.q(MemReadEX), .d(MemReadID), .reset, .clk);
D_FF RegWrite_IDEX (.q(RegWriteEX), .d(RegWriteID), .reset, .clk);
D_FF immi_IDEX (.q(immiEX), .d(immiID), .reset, .clk);
D_FF flag_IDEX (.q(flag_enEX), .d(flag_enID), .reset, .clk);
D_FF rdSrc_IDEX (.q(rdSrcEX), .d(rdSrcID), .reset, .clk);
D_FF writeSr_IDEX (.q(writeSrEX), .d(writeSrID), .reset, .clk);
D_FF ALUSrc_EXMEM (.q(ALUSrcMEM), .d(ALUSrcEX), .reset, .clk);
D_FF MemWrite_EXMEM (.q(MemWriteMEM), .d(MemWriteEX), .reset, .clk);
D_FF Mem2Reg_EXMEM (.q(Mem2RegMEM), .d(Mem2RegEX), .reset, .clk);
D_FF MemRead_EXMEM (.q(MemReadMEM), .d(MemReadEX), .reset, .clk);
D_FF RegWrite_EXMEM (.q(RegWriteMEM), .d(RegWriteEX), .reset, .clk);
D_FF immi_EXMEM (.q(immiMEM), .d(immiEX), .reset, .clk);
D_FF flag_EXMEM (.q(flag_enMEM), .d(flag_enEX), .reset, .clk);
D_FF rdSrc_EXMEM (.q(rdSrcMEM), .d(rdSrcEX), .reset, .clk);
D_FF writeSr_EXMEM (.q(writeSrMEM), .d(writeSrEX), .reset, .clk);
D_FF ALUSrc_MEMWB (.q(ALUSrcWB), .d(ALUSrcMEM), .reset, .clk);
D_FF MemWrite_MEMWB (.q(MemWriteWB), .d(MemWriteMEM), .reset, .clk);
D_FF Mem2Reg_MEMWB (.q(Mem2RegWB), .d(Mem2RegMEM), .reset, .clk);
D_FF MemRead_MEMWB (.q(MemReadWB), .d(MemReadMEM), .reset, .clk);
D_FF RegWrite_MEMWB (.q(RegWriteWB), .d(RegWriteMEM), .reset, .clk);
D_FF immi_MEMWB (.q(immiWB), .d(immiMEM), .reset, .clk);
D_FF flag_MEMWB (.q(flag_enWB), .d(flag_enMEM), .reset, .clk);
D_FF rdSrc_MEMWB (.q(rdSrcWB), .d(rdSrcMEM), .reset, .clk);
D_FF writeSr_MEMWB (.q(writeSrWB), .d(writeSrMEM), .reset, .clk);
/****************************************
*************** DATAPATH ***************
****************************************/
logic [63:0] pc4ID, pc4EX, pc4MEM, pc4WB;
// ------------- INSTRUCTION FETCH --------------
// for this stage if there is no stage name after the port it is assumed is in IF stage
logic [63:0] aluResultIF_no_offset, aluResultIF, pcNormIF, pc4IF, pcIF, newpc;
logic [63:0] branchB, branchA, branchsel;
logic [63:0] brAddr64, cbzaddr64;
logic [63:0] srcBEIF;
logic [31:0] instructionID, instructionIF;
logic [63:0] rd1ID, rd2ID; // from ID, but accomodate for modelsim
// this stage (branch) need a pcIF - 4, to fix the timing problem, when brTaken is 1
mux2_n brMux (.datOut(pcNormIF), .datIn0(pc4IF), .datIn1(aluResultIF), .sel(BrTakenIF)); // 14
alu addBr (.A(pcIF), .B(branchB), .cntrl(3'b010), .result(aluResultIF_no_offset), .negative(), .zero(), .overflow(), .carry_out()); // 15, the adder. ground the flags
alu subBr (.A(aluResultIF_no_offset), .B(64'd4), .cntrl(3'b011), .result(aluResultIF), .negative(), .zero(), .overflow(), .carry_out()); //offset pc down for correctly branching
shifter instrShift (.value(branchsel), .direction(1'b0), .distance(6'd2), .result(branchB));
assign brAddr64 = {{38{instructionID[25]}}, instructionID[25:0]}; // unconditional instruction address
assign cbzaddr64 = {{45{instructionID[23]}}, instructionID[23:5]}; // conditional instruction address
assign srcBEIF = rd2ID;
mux2_n branchMux (.datOut(branchsel), .datIn0(cbzaddr64), .datIn1(brAddr64), .sel(UnCondBrIF)); // 19
alu add4PC (.A(pcIF), .B(64'd4), .cntrl(3'b010), .result(pc4IF), .negative(), .zero(), .overflow(), .carry_out()); // 17, the adder. ground the flags
mux2_n pcMux (.datOut(newpc), .datIn0(pcNormIF), .datIn1(srcBEIF), .sel(pcSrIF)); // 18
DFF64 UpdatePC (pcIF, newpc, clk, 1'b1, reset);
instructmem u_instructmem( .address(pcIF), .instruction (instructionIF), .clk );
// ********************* IF/ID Regs ************************
DFF64 #( .N(32) ) instrction_IFID ( .q(instructionID), .d(instructionIF), .clk, .en(1'b1), .reset );
// *********************************************************
// ---------------- INSTRUCTION DECODE -------------------
logic [1:0] fowardAE, fowardBE; // two fowarding commands in ID stage
logic [4:0] rdReg1ID, rdReg2ID, writeRegID;
logic [4:0] writeRegWB, writeRegMEM; // intel...
logic [31:0] instructionEX;
logic [63:0] wrBaccWB, wrBaccMEM, WriteDataID, ReadData1ID, ReadData2ID, resultEX;
logic notClk;
assign rdReg1ID = instructionID[9:5];
DFF64 pc4_IFID ( .q(pc4ID), .d(pc4IF), .clk, .en(1'b1), .reset);
mux2_n #( .WIDTH(5) ) rdReg2Mux ( .datOut(rdReg2ID), .datIn1(instructionID[20:16]), .datIn0(instructionID[4:0]), .sel(Reg2LocID) );
mux2_n #( .WIDTH(5) ) writeRegMux ( .datOut(writeRegID), .datIn0(instructionID[4:0]), .datIn1(5'd30), .sel(rdSrcID) );
mux2_n #( .WIDTH(64) ) writeDataMux ( .datOut(WriteDataID), .datIn0(pc4WB), .datIn1(wrBaccWB), .sel(writeSrWB) );
not #0.05 invClk (notClk, clk);
regfile u_regfile (
.RegWrite (RegWriteWB),
.clk (notClk),
.ReadRegister1 (rdReg1ID),
.ReadRegister2 (rdReg2ID),
.WriteData (WriteDataID),
.WriteRegister (writeRegWB),
.ReadData1 (ReadData1ID),
.ReadData2 (ReadData2ID)
);
mux4_64 fowardAE_mux (.out(rd1ID), .i0(ReadData1ID), .i1(wrBaccMEM), .i2(resultEX), .i3(pc4MEM), .sel(fowardAE));
mux4_64 fowardBE_mux (.out(rd2ID), .i0(ReadData2ID), .i1(wrBaccMEM), .i2(resultEX), .i3(64'd0), .sel(fowardBE));
// ********************* ID/EX Regs ************************
logic [4:0] writeRegEX; // this wire just passing by
logic [4:0] rdReg1EX, rdReg2EX;
logic [63:0] ReadData1EX, ReadData2EX;
logic negativeEX, zeroEX, overflowEX, carry_outEX, nnegativeEX, nzeroEX, noverflowEX, ncarry_outEX;
logic [63:0] b64EX, rd1EX, bDatEX, resultMEM, imm64EX, instr64EX;
DFF64 read_data_1_IDEX ( .q(ReadData1EX), .d(ReadData1ID), .clk, .en(1'b1), .reset);
DFF64 read_data_2_IDEX ( .q(ReadData2EX), .d(ReadData2ID), .clk, .en(1'b1), .reset);
DFF64 #( .N(32) ) instrction_IDEX ( .q(instructionEX), .d(instructionID), .clk, .en(1'b1), .reset);
DFF64 #( .N(5) ) rd_IDEX ( .q(writeRegEX), .d(writeRegID), .clk, .en(1'b1), .reset);
DFF64 #( .N(5) ) rdReg1_IDEX ( .q(rdReg1EX), .d(rdReg1ID), .clk, .en(1'b1), .reset);
DFF64 #( .N(5) ) rdReg2_IDEX ( .q(rdReg2EX), .d(rdReg2ID), .clk, .en(1'b1), .reset);
DFF64 rd1_IDEX ( .q(rd1EX), .d(rd1ID), .clk, .en(1'b1), .reset);
DFF64 rd2_IDEX ( .q(rd2EX), .d(rd2ID), .clk, .en(1'b1), .reset);
DFF64 pc4_IDEX ( .q(pc4EX), .d(pc4ID), .clk, .en(1'b1), .reset);
// *********************************************************
// ---------------------- EXECUTE --------------------------
assign imm64EX = {52'd0, instructionEX[21:10]}; // 11, zero extended 12-bit immediate value
assign instr64EX = {{55{instructionEX[20]}}, instructionEX[20:12]}; // 10, 9 bit DT_address value
mux2_n immMux(.sel(immiEX), .datIn0(instr64EX), .datIn1(imm64EX), .datOut(b64EX)); // 7
mux2_n alu_portB_mux( .sel(ALUSrcEX), .datIn0(rd2EX), .datIn1(b64EX), .datOut(bDatEX)); // 5
alu u_alu (
.A (rd1EX),
.B (bDatEX),
.cntrl (ALUOpEX),
.result (resultEX),
.negative (negativeEX),
.zero (zeroEX),
.overflow (overflowEX),
.carry_out (carry_outEX)
);
DFF_with_enable negDff (.q(nnegativeEX), .d(negativeEX), .reset, .clk(notClk), .en(flag_enEX));
DFF_with_enable ofDff (.q(noverflowEX), .d(overflowEX), .reset, .clk(notClk), .en(flag_enEX));
DFF_with_enable coutDff (.q(ncarry_outEX), .d(carry_outEX), .reset, .clk(notClk), .en(flag_enEX));
DFF_with_enable zeroDff (.q(nzeroEX), .d(zeroEX), .reset, .clk(notClk), .en(flag_enEX));
foward #(
.DATA (64),
.ADDR (5)
) u_foward (
.exec_en (RegWriteEX),
.mem_en (RegWriteMEM),
.pc_en (writeSrMEM),
.execReg (writeRegEX),
.memReg (writeRegMEM),
.regA (rdReg1ID),
.regB (rdReg2ID),
.fwA (fowardAE),
.fwB (fowardBE)
);
// cbz flag check Read data B is == 0
alu cbz_alu ( .A(64'd0), .B(rd2ID), .cntrl(3'b000), .result(), .negative(), .zero(zeroCBZ), .overflow(), .carry_out(cary_out) );
// ********************* EX/MEM Regs ************************
logic [63:0] writeDataMEM;
DFF64 #( .N(5) ) rd_EXMEM ( .q(writeRegMEM), .d(writeRegEX), .clk, .en(1'b1), .reset);
DFF64 wd_EXMEM ( .q(writeDataMEM), .d(rd2EX), .clk, .en(1'b1), .reset);
DFF64 result_EXMEM ( .q(resultMEM), .d(resultEX), .clk, .en(1'b1), .reset);
DFF64 pc4_EXMEM ( .q(pc4MEM), .d(pc4EX), .clk, .en(1'b1), .reset);
// *********************************************************
// ----------------------- MEMORY ---------------------------
logic [63:0] memdoutMEM;
datamem dmem( .write_enable(MemWriteMEM), .read_enable(MemReadMEM), .clk, .address(resultMEM), .write_data(writeDataMEM), .xfer_size(4'd8), .read_data(memdoutMEM)); // 8
mux2_n #( .WIDTH (64) ) wrBacc_muxWB ( .datOut (wrBaccMEM), .datIn1 (memdoutMEM), .datIn0 (resultMEM), .sel (Mem2RegMEM) );
// ********************* MEM/WB Regs ************************
logic [63:0] memdoutWB, resultWB;
DFF64 #( .N(5) ) rd_MEMWB ( .q(writeRegWB), .d(writeRegMEM), .clk, .en(1'b1), .reset);
DFF64 result_MEMWB ( .q(resultWB), .d(resultMEM), .clk, .en(1'b1), .reset);
DFF64 memout_MEMWB ( .q(memdoutWB), .d(memdoutMEM), .clk, .en(1'b1), .reset);
DFF64 wrBacc_MEMWB ( .q(wrBaccWB), .d(wrBaccMEM), .clk, .en(1'b1), .reset);
DFF64 pc4_MEMWB ( .q(pc4WB), .d(pc4MEM), .clk, .en(1'b1), .reset);
// *********************************************************
// -------------------------WRITE BACK -------------------------
/****************************************
*************** CONTROL ****************
****************************************/
control master(instructionID, Reg2LocID, ALUSrcID, Mem2RegID, RegWriteID, MemWriteID, MemReadID, BrTakenIF, UnCondBrIF, ALUOpID, noverflowEX, nnegativeEX, nzeroEX, zeroCBZ, writeSrID, pcSrIF, immiID, flag_enID, rdSrcID);
endmodule
module CPU_pipelined_testbench();
parameter ClockDelay = 5000;
logic reset;
logic clk;
CPU_pipelined dut (.*);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
initial begin // Set up the clock
clk <= 0;
forever #(ClockDelay/2) clk <= ~clk;
end
integer i;
initial begin
reset <= 1; @(posedge clk);
reset <= 0; @(posedge clk); // toggle reset
$display("%t begin benchmark", $time);
repeat(777) @(posedge clk); // repeat *many* times to give enough cycyles to calculate everything (~800 for sort)
$stop;
end
endmodule

43
src/hdl/DFF3.sv Normal file
View File

@ -0,0 +1,43 @@
`timescale 1ns/10ps
module DFF3 (q, d, clk, en, reset); //Maybe replace by a 64bit register
input logic [2:0] d;
input logic clk, en, reset;
output logic [2:0] q;
genvar i;
generate
for (i = 0; i < 3; i++) begin: eachDFF
DFF_with_enable dff1 (d[i], reset, clk, en, q[i]);
end
endgenerate
endmodule
//tb
module DFF3_testbench();
logic [2:0] q, d;
logic reset, clk, en;
DFF_with_enable dut (.d, .reset, .clk, .en, .q);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
// Set up the inputs to the design. Each line is a clock cycle.
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// check if enable works properly by writing data when turn on enable
reset <= 0; d <= 3'b1; en <= 0; @(posedge clk);
en <= 1; @(posedge clk);
d <= 3'b0; @(posedge clk);
// check if data output would stay when enable is off
en <= 0; @(posedge clk);
repeat(2) @(posedge clk);
$stop; // End the simulation.
end
endmodule

43
src/hdl/DFF32.sv Normal file
View File

@ -0,0 +1,43 @@
`timescale 1ns/10ps
module DFF32 (q, d, clk, en, reset); //Maybe replace by a 64bit register
input logic [31:0] d;
input logic clk, en, reset;
output logic [31:0] q;
genvar i;
generate
for (i = 0; i < 32; i++) begin: eachDFF
DFF_with_enable dff1 (d[i], reset, clk, en, q[i]);
end
endgenerate
endmodule
//tb
module DFF32_testbench();
logic [31:0] q, d;
logic reset, clk, en;
DFF_with_enable dut (.d, .reset, .clk, .en, .q);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
// Set up the inputs to the design. Each line is a clock cycle.
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// check if enable works properly by writing data when turn on enable
reset <= 0; d <= 32'b1; en <= 0; @(posedge clk);
en <= 1; @(posedge clk);
d <= 32'b0; @(posedge clk);
// check if data output would stay when enable is off
en <= 0; @(posedge clk);
repeat(2) @(posedge clk);
$stop; // End the simulation.
end
endmodule

46
src/hdl/DFF64.sv Normal file
View File

@ -0,0 +1,46 @@
`timescale 1ns/10ps
// N-bit enabled DFF
module DFF64 #(parameter N=64) (q, d, clk, en, reset);
input logic [N-1:0] d;
input logic clk, en, reset;
output logic [N-1:0] q;
//logic [63:0] data;
genvar i;
generate
for (i = 0; i < N; i++) begin: eachDFF
DFF_with_enable dff1 (d[i], reset, clk, en, q[i]);
end
endgenerate
endmodule
//tb
module DFF64_testbench();
logic [63:0] q, d;
logic reset, clk, en;
DFF_with_enable dut (.d, .reset, .clk, .en, .q);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
// Set up the inputs to the design. Each line is a clock cycle.
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// check if enable works properly by writing data when turn on enable
reset <= 0; d <= 64'b1; en <= 0; @(posedge clk);
en <= 1; @(posedge clk);
d <= 64'b0; @(posedge clk);
// check if data output would stay when enable is off
en <= 0; @(posedge clk);
repeat(2) @(posedge clk);
$stop; // End the simulation.
end
endmodule

View File

@ -0,0 +1,42 @@
// eneabled dff using 2:1 mux and dff
`timescale 1ns/10ps
module DFF_with_enable (d, reset, clk, en, q);
input logic d, reset, clk, en;
output logic q;
// internal wire to connect mux to the dff
logic in;
// a 2:1 mux that when enabled intput d is dff's output q, otherwise loop q back into itself
mux2_1 m1 (in, q, d, en);
D_FF d1 (q, in, reset, clk);
endmodule
module DFF_with_enable_testbench();
logic d, q, reset, clk, en;
DFF_with_enable dut (.d, .reset, .clk, .en, .q);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
// Set up the inputs to the design. Each line is a clock cycle.
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// check if enable works properly by writing data when turn on enable
reset <= 0; d <= 1; q <= 0; en <= 0; @(posedge clk);
en <= 1; @(posedge clk);
d <= 0; @(posedge clk);
// check if data output would stay when enable is off
en <= 0; @(posedge clk);
repeat(2) @(posedge clk);
$stop; // End the simulation.
end
endmodule

11
src/hdl/D_FF.sv Normal file
View File

@ -0,0 +1,11 @@
// Data Flip Flop
module D_FF (q, d, reset, clk);
output reg q;
input d, reset, clk;
always_ff @(posedge clk)
if (reset)
q <= 0; // On reset, set to 0
else
q <= d; // Otherwise out = d
endmodule

44
src/hdl/alu.sv Normal file
View File

@ -0,0 +1,44 @@
// 64-bit ALU from the 64 bit slicer, with 64-bit nor for zero and a 2-bit xnor for overflow detection
`timescale 1ns/10ps
module alu #(parameter DELAY_NS=0.05) (A, B, cntrl, result, negative, zero, overflow, carry_out);
input logic [63:0] A, B;
input logic [2:0] cntrl;
output logic [63:0] result;
output logic zero, overflow, carry_out, negative;
// array of carry_in and carry_out
logic [64:0] carry_on;
// subtract signal
logic sub;
// determines if the operation is subtract
and #50 subSel (sub, cntrl[1], cntrl[0]);
// carry_in for bit0 is 1 if subtract; 0 for others
assign carry_on[0] = sub;
// unique case for slice bit0
slice s1 (A[0], B[0], sub, cntrl, carry_on[1], result[0]);
// regular cases for later slices
genvar i;
generate
for (i = 1; i < 64; i++) begin : gen_slice
slice s2 (A[i], B[i], carry_on[i], cntrl, carry_on[i+1], result[i]);
end
endgenerate
// determines zero
nor64 n64 (result, zero);
// determines overflow
xor #50 x1 (overflow, carry_on[63], carry_on[64]);
// determines neg
assign negative = result[63];
// determines carry_out
assign carry_out = carry_on[64];
endmodule

56
src/hdl/alustim.sv Normal file
View File

@ -0,0 +1,56 @@
// Test bench for ALU
`timescale 1ns/10ps
// Meaning of signals in and out of the ALU:
// Flags:
// negative: whether the result output is negative if interpreted as 2's comp.
// zero: whether the result output was a 64-bit zero.
// overflow: on an add or subtract, whether the computation overflowed if the inputs are interpreted as 2's comp.
// carry_out: on an add or subtract, whether the computation produced a carry-out.
// cntrl Operation Notes:
// 000: result = B value of overflow and carry_out unimportant
// 010: result = A + B
// 011: result = A - B
// 100: result = bitwise A & B value of overflow and carry_out unimportant
// 101: result = bitwise A | B value of overflow and carry_out unimportant
// 110: result = bitwise A XOR B value of overflow and carry_out unimportant
module alustim();
parameter delay = 100000;
logic [63:0] A, B;
logic [2:0] cntrl;
logic [63:0] result;
logic negative, zero, overflow, carry_out ;
parameter ALU_PASS_B=3'b000, ALU_ADD=3'b010, ALU_SUBTRACT=3'b011, ALU_AND=3'b100, ALU_OR=3'b101, ALU_XOR=3'b110;
alu dut (.A, .B, .cntrl, .result, .negative, .zero, .overflow, .carry_out);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
integer i;
logic [63:0] test_val;
initial begin
$display("%t testing PASS_A operations", $time);
cntrl = ALU_PASS_B;
for (i=0; i<100; i++) begin
A = $random(); B = $random();
#(delay);
assert(result == B && negative == B[63] && zero == (B == '0));
end
$display("%t testing addition", $time);
cntrl = ALU_ADD;
A = 64'h0000000000000001; B = 64'h0000000000000001;
#(delay);
assert(result == 64'h0000000000000002 && carry_out == 0 && overflow == 0 && negative == 0 && zero == 0);
end
endmodule

234
src/hdl/control.sv Normal file
View File

@ -0,0 +1,234 @@
// control module for the cpu, based from the datasheet and spec provided
`timescale 1ns/10ps
module control(instr, Reg2Loc, ALUSrc, Mem2Reg, RegWrite, MemWrite, MemRead, BrTaken, UnCondBr, ALUop, overflow, neg, zero, cbzZero, writeSr, pcSr, immi, flag_en, rdSrc);
input logic [31:0] instr;
input logic overflow, neg, zero, cbzZero;
output logic Reg2Loc, ALUSrc, Mem2Reg, RegWrite, MemWrite, MemRead, BrTaken, UnCondBr, writeSr, pcSr, immi, flag_en, rdSrc;
output logic [2:0] ALUop;
// instruction sets, decoded from a string of binaries
always_comb begin
//ADDI
if (instr[31:22] == 10'b1001000100) begin
Reg2Loc = 1'bx;
ALUSrc = 1'b1;
Mem2Reg = 1'b0;
RegWrite = 1'b1;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b0;
UnCondBr = 1'bx;
ALUop = 3'b010;
writeSr = 1'b1;
pcSr = 1'b0;
immi = 1'b1;
flag_en = 1'b0;
rdSrc = 1'b0;
// ADDS
end else if (instr[31:21] == 11'b10101011000) begin
Reg2Loc = 1'b1;
ALUSrc = 1'b0;
Mem2Reg = 1'b0;
RegWrite = 1'b1;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b0;
UnCondBr = 1'bx;
ALUop = 3'b010;
writeSr = 1'b1;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b1;
rdSrc = 1'b0;
// B
end else if (instr[31:26] == 6'b000101) begin
Reg2Loc = 1'bx;
ALUSrc = 1'bx;
Mem2Reg = 1'bx;
RegWrite = 1'b0;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b1;
UnCondBr = 1'b1;
ALUop = 3'bx;
writeSr = 1'bx;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// B.LT
end else if ((instr[31:24] == 8'b01010100) && (instr[4:0] == 5'b01011)) begin
Reg2Loc = 1'bx;
ALUSrc = 1'bx;
Mem2Reg = 1'bx;
RegWrite = 1'b0;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = (overflow != neg);
UnCondBr = 1'b0;
ALUop = 3'bx;
writeSr = 1'bx;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// BL?
end else if (instr[31:26] == 6'b100101) begin
Reg2Loc = 1'bx;
ALUSrc = 1'bx;
Mem2Reg = 1'bx;
RegWrite = 1'b1;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b1;
UnCondBr = 1'b1;
ALUop = 3'bx;
writeSr = 1'b0;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b1;
// BR
end else if (instr[31:21] == 11'b11010110000) begin
Reg2Loc = 1'b0;
ALUSrc = 1'bx;
Mem2Reg = 1'bx;
RegWrite = 1'b0;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'bx;
UnCondBr = 1'bx;
ALUop = 3'bx;
writeSr = 1'bx;
pcSr = 1'b1;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// CBZ
end else if (instr[31:24] == 8'b10110100) begin
Reg2Loc = 1'b0;
ALUSrc = 1'b0;
Mem2Reg = 1'bx;
RegWrite = 1'b0;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = cbzZero;
UnCondBr = 1'b0;
ALUop = 3'b000;
writeSr = 1'bx;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// LDUR
end else if (instr[31:21] == 11'b11111000010) begin
Reg2Loc = 1'bx;
ALUSrc = 1'b1;
Mem2Reg = 1'b1;
RegWrite = 1'b1;
MemWrite = 1'b0;
MemRead = 1'b1;
BrTaken = 1'b0;
UnCondBr = 1'bx;
ALUop = 3'b010;
writeSr = 1'b1;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// STUR
end else if (instr[31:21] == 11'b11111000000) begin
Reg2Loc = 1'b0;
ALUSrc = 1'b1;
Mem2Reg = 1'bx;
RegWrite = 1'b0;
MemWrite = 1'b1;
MemRead = 1'b0;
BrTaken = 1'b0;
UnCondBr = 1'bx;
ALUop = 3'b010;
writeSr = 1'bx;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
// SUBS
end else if (instr[31:21] == 11'b11101011000) begin
Reg2Loc = 1'b1;
ALUSrc = 1'b0;
Mem2Reg = 1'b0;
RegWrite = 1'b1;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b0;
UnCondBr = 1'bx;
ALUop = 3'b011;
writeSr = 1'b1;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b1;
rdSrc = 1'b0;
end
else begin // default?
Reg2Loc = 1'b0;
ALUSrc = 1'b0;
Mem2Reg = 1'b0;
RegWrite = 1'b0;
MemWrite = 1'b0;
MemRead = 1'b0;
BrTaken = 1'b0;
UnCondBr = 1'b0;
ALUop = 3'b000;
writeSr = 1'b0;
pcSr = 1'b0;
immi = 1'b0;
flag_en = 1'b0;
rdSrc = 1'b0;
end
end
endmodule
// tb
module control_testbench();
logic [31:0] instr;
logic overflow, neg, zero, cbzZero;
logic Reg2Loc, ALUSrc, Mem2Reg, RegWrite, MemWrite, MemRead, BrTaken, UnCondBr, writeSr, pcSr, immi, flag_en, rdSrc;
logic [2:0] ALUop;
control dut (.*);
// Set up the inputs to the design. Each line is a clock cycle.
initial begin
// test ADDI
instr <= 32'b10010001000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test ADDS
instr <= 32'b10101011000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test B
instr <= 32'b00010100000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test B.LT
instr <= 32'b01010100000000000000000000001011;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test BL
instr <= 32'b10010100000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test BR
instr <= 32'b11010110000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test CBZ
instr <= 32'b10110100000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test LDUR
instr <= 32'b11111000010000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test STUR
instr <= 32'b11111000000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
// test SUBS
instr <= 32'b11101011000000000000000000000000;
overflow <= 0; neg <= 0; zero <= 0; cbzZero <= 0; #10;
$stop; // End the simulation.
end
endmodule

125
src/hdl/datamem.sv Normal file
View File

@ -0,0 +1,125 @@
// Data memory. Supports reads and writes. Data initialized to "X". Note that this memory is little-endian:
// The value of the first double-word is Mem[0]+Mem[1]*256+Mem[2]*256*256+ ... + Mem[7]*256^7
//
// Size is the number of bytes to transfer, and memory supports any power of 2 access size up to double-word.
// However, all accesses must be aligned. So, the address of any access of size S must be a multiple of S.
`timescale 1ns/10ps
// How many bytes are in our memory? Must be a power of two.
`define DATA_MEM_SIZE 1024
module datamem (
input logic [63:0] address,
input logic write_enable,
input logic read_enable,
input logic [63:0] write_data,
input logic clk,
input logic [3:0] xfer_size,
output logic [63:0] read_data
);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
// Make sure size is a power of two and reasonable.
initial assert((`DATA_MEM_SIZE & (`DATA_MEM_SIZE-1)) == 0 && `DATA_MEM_SIZE > 8);
// Make sure accesses are reasonable.
always_ff @(posedge clk) begin
if (address !== 'x && (write_enable || read_enable)) begin // address or size could be all X's at startup, so ignore this case.
assert((address & (xfer_size - 1)) == 0); // Makes sure address is aligned.
assert((xfer_size & (xfer_size-1)) == 0); // Make sure size is a power of 2.
assert(address + xfer_size <= `DATA_MEM_SIZE); // Make sure in bounds.
end
end
// The data storage itself.
logic [7:0] mem [`DATA_MEM_SIZE-1:0];
// Compute a properly aligned address
logic [63:0] aligned_address;
always_comb begin
case (xfer_size)
1: aligned_address = address;
2: aligned_address = {address[63:1], 1'b0};
4: aligned_address = {address[63:2], 2'b00};
8: aligned_address = {address[63:3], 3'b000};
default: aligned_address = {address[63:3], 3'b000}; // Bad addresses forced to double-word aligned.
endcase
end
// Handle the reads.
integer i;
always_comb begin
read_data = 'x;
if (read_enable == 1)
for(i=0; i<xfer_size; i++)
read_data[8*i+7 -: 8] = mem[aligned_address + i]; // 8*i+7 -: 8 means "start at 8*i+7, for 8 bits total"
end
// Handle the writes.
integer j;
always_ff @(posedge clk) begin
if (write_enable)
for(j=0; j<xfer_size; j++)
mem[aligned_address + j] <= write_data[8*j+7 -: 8];
end
endmodule
module datamem_testbench ();
parameter ClockDelay = 5000;
logic [63:0] address;
logic write_enable;
logic read_enable;
logic [63:0] write_data;
logic clk;
logic [3:0] xfer_size;
logic [63:0] read_data;
datamem dut (.address, .write_enable, .write_data, .clk, .xfer_size, .read_data);
initial begin // Set up the clock
clk <= 0;
forever #(ClockDelay/2) clk <= ~clk;
end
// Keep copy of what we've done so far.
logic [7:0] test_data [`DATA_MEM_SIZE-1:0];
integer i, j, t;
logic [63:0] rand_addr, rand_data;
logic [3:0] rand_size;
logic rand_we;
initial begin
address <= '0; read_enable <= '0; write_enable <= '0; write_data <= 'x; xfer_size <= 4'd8;
@(posedge clk);
for(i=0; i<1024*`DATA_MEM_SIZE; i++) begin
// Set up transfer in rand_*, then send to outputs.
rand_we = $random();
rand_data = $random();
rand_size = $random() & 2'b11; rand_size = 4'b0001 << 8; // 1, 2, 4, or 8 (rand_size)
rand_addr = $random() & (`DATA_MEM_SIZE-1); rand_addr = (rand_addr/rand_size) * rand_size; // Block aligned
write_enable <= rand_we;
read_enable <= ~rand_we;
xfer_size <= rand_size;
address <= rand_addr;
write_data <= rand_data;
@(posedge clk); // Do the xfer.
if (rand_we) begin // Track Writes
for(j=0; j<rand_size; j++)
test_data[rand_addr+j] = rand_data[8*j+7 -: 8];
end else begin // Verify reads.
for (j=0; j<rand_size; j++)
assert(test_data[rand_addr+j] === read_data[8*j+7 -: 8]); // === will return true when comparing X's.
end
end
$stop;
end
endmodule

9
src/hdl/datpath.sv Normal file
View File

@ -0,0 +1,9 @@
// a datapath of the cpu based on fig 4.15 on textbook
module datapath(overflow, neg, zero, Reg2Loc, ALUSrc, Mem2Reg, RegWrite, MemWrite, MemRead, BrTaken, UnCondBr, writeSr, pcSr, ALUop);
input logic Reg2Loc, ALUSrc, Mem2Reg, RegWrite, MemWrite, MemRead, BrTaken, UnCondBr, writeSr, pcSr;
input logic [2:0] ALUop;
output logic overflow, neg, zero;
endmodule

44
src/hdl/decoder_2x4.sv Normal file
View File

@ -0,0 +1,44 @@
// Test bench for Register file
`timescale 1ns/10ps
module decoder_2x4(in, en, out);
// 2 bit input, 4 bit output
input logic [1:0] in;
input logic en;
output logic [3:0] out;
// store !in
logic [1:0] not_in;
// !in inverting logic
not #0.05 not0(not_in[0], in[0]);
not #0.05 not1(not_in[1], in[1]);
// out logic
and #0.05 and0(out[0], not_in[0], not_in[1], en);
and #0.05 and1(out[1], in[0], not_in[1], en);
and #0.05 and2(out[2], not_in[0], in[1], en);
and #0.05 and3(out[3], in[0], in[1], en);
endmodule
module decoder_2x4_testbench();
logic [1:0] in;
logic en;
logic [3:0] out;
decoder_2x4 dut (.in, .en, .out);
integer i, j;
initial begin // Test all input variations
for(i=0; i<4; i++) begin
en = 1;
in[1:0] = i; #1;
end
for(j=0; j<4; j++) begin
en = 0;
in[1:0] = j; #1;
end
end
endmodule

50
src/hdl/decoder_3x8.sv Normal file
View File

@ -0,0 +1,50 @@
// a gate-level 3 by 8 decoder
`timescale 1ns/10ps
module decoder_3x8 (in, en, out);
// 3 bit input, 8 bit output
input logic [2:0] in;
input logic en;
output logic [7:0] out;
// store !in
logic [2:0] not_in;
// !in inverting logic
not #0.05 not0(not_in[0], in[0]);
not #0.05 not1(not_in[1], in[1]);
not #0.05 not2(not_in[2], in[2]);
// out logic
and #0.05 out0(out[0], not_in[0], not_in[1], not_in[2], en);
and #0.05 out1(out[1], not_in[2], not_in[1], in[0], en);
and #0.05 out2(out[2], not_in[2], in[1], not_in[0], en);
and #0.05 out3(out[3], not_in[2], in[1], in[0], en);
and #0.05 out4(out[4], in[2], not_in[1], not_in[0], en);
and #0.05 out5(out[5], in[2], not_in[1], in[0], en);
and #0.05 out6(out[6], in[2], in[1], not_in[0], en);
and #0.05 out7(out[7], in[0], in[1], in[2], en);
endmodule
// tb
module decoder_3x8_testbench();
logic [2:0] in;
logic [7:0] out;
logic en;
logic [2:0] not_in;
decoder_3x8 dut (.in, .en, .out);
integer i, j;
initial begin // Test all input variations
for(i=0; i<8; i++) begin
en = 1;
in[2:0] = i; #1;
end
for(j=0; j<8; j++) begin
en = 0;
in[2:0] = j; #1;
end
end
endmodule

41
src/hdl/decoder_5x32.sv Normal file
View File

@ -0,0 +1,41 @@
// 5 to 32 using basic gates
`timescale 1ns/10ps
module decoder_5x32(in, out, en);
// 5 bits input, 32 bit output
input logic [4:0] in;
input logic en;
output logic [31:0] out;
// create local wires for the 2x4 decoder to the four 3x8 decoders
logic [3:0] decoder_2x4_out;
// using one 2x4 decoder to conect and enable four 3x8 decoders to make a 5x32 decoder
decoder_2x4 dec0(in[4:3], en, decoder_2x4_out[3:0]);
decoder_3x8 dec1(in[2:0], decoder_2x4_out[0], out[7:0]);
decoder_3x8 dec2(in[2:0], decoder_2x4_out[1], out[15:8]);
decoder_3x8 dec3(in[2:0], decoder_2x4_out[2], out[23:16]);
decoder_3x8 dec4(in[2:0], decoder_2x4_out[3], out[31:24]);
endmodule
module decoder_5x32_testbench();
logic [4:0] in;
logic en;
logic [31:0] out;
logic [3:0] decoder_2x4_out;
decoder_5x32 dut (.in, .en, .out);
integer i, j;
initial begin // Test all input variations
for(i=0; i<32; i++) begin
en = 1;
in[4:0] = i; #1;
end
for(j=0; j<32; j++) begin
en = 0;
in[4:0] = j; #1;
end
end
endmodule

61
src/hdl/foward.sv Normal file
View File

@ -0,0 +1,61 @@
//Foward control unit for the piplined LEGV-5 CPU
`timescale 1ns/10ps
module foward #(parameter DATA=64, ADDR=5) (exec_en, mem_en, pc_en, execReg, memReg, regA, regB, fwA, fwB);
input logic exec_en, mem_en, pc_en;
input logic [4:0] execReg, memReg, regA, regB;
output logic [1:0] fwA, fwB;
// EX/MEM.RegisterRd: execReg
// EX/MEM.RegWrite: exec_en
always_comb begin
// Rn
if (exec_en && execReg != 5'b11111 && execReg == regA) fwA = 2'b10; //EX Hazard
else if (mem_en && memReg != 5'b11111 && memReg == regA && !pc_en) fwA = 2'b11; // Mem Hazard, for PC
else if (mem_en && memReg != 5'b11111 && memReg == regA) fwA = 2'b01; // Mem Hazard
else fwA = 2'b00; // default
// Rm
if (exec_en && execReg != 5'b11111 && execReg == regB) fwB = 2'b10; //EX Hazard
else if (mem_en && memReg != 5'b11111 && memReg == regB && !pc_en) fwA = 2'b11; // Mem Hazard, for PC
else if (mem_en && memReg != 5'b11111 && memReg == regB) fwB = 2'b01; // Mem Hazard
else fwB = 2'b00; // default
end
endmodule
module foward_tb ();
logic exec_en, mem_en, pc_en;
logic [4:0] execReg, memReg, regA, regB;
logic [1:0] fwA, fwB;
foward dut (.*);
integer i;
initial begin
// case 1: nothing the same, should just use option 0
execReg = 5'd23; memReg = 5'd13; regA = 5'd30; regB = 5'd12;
exec_en = 1'b0; #10;
exec_en = 1'b1; #10;
mem_en = 1'b0; #10;
mem_en = 1'b1; #10;
pc_en = 1'b0; #10;
pc_en = 1'b1; #10;
// case 2: exec hazard
execReg = 5'd23; memReg = 5'd12; regA = 5'd30; regB = 5'd12;
exec_en = 1'b0; #10;
exec_en = 1'b1; #10;
mem_en = 1'b0; #10;
mem_en = 1'b1; #10;
pc_en = 1'b0; #10;
pc_en = 1'b1; #10;
// case 3: mem hazard
execReg = 5'd12; memReg = 5'd23; regA = 5'd30; regB = 5'd12;
exec_en = 1'b0; #10;
exec_en = 1'b1; #10;
mem_en = 1'b0; #10;
mem_en = 1'b1; #10;
pc_en = 1'b0; #10;
pc_en = 1'b1; #10;
end
endmodule

34
src/hdl/fullAdder.sv Normal file
View File

@ -0,0 +1,34 @@
// a gate-level 1-bit full adder
`timescale 1ns/10ps
module fullAdder #(parameter DELAY_NS=0.05) (a, b, c_in, sum, c_out);
input logic a, b, c_in;
output logic sum, c_out;
// wiring all the logic gates together
wire xor1, and_cin, ab, or_out;
// full adder logic
xor #DELAY_NS u1 (xor1, a, b);
xor #DELAY_NS u2 (sum, xor1, c_in);
and #DELAY_NS u3 (and_cin, xor1, c_in);
and #DELAY_NS u4 (ab, a, b);
or #DELAY_NS u5 (c_out, ab, and_cin);
endmodule
module fullAdder_testbench();
logic a, b, c_in;
logic sum, c_out;
fullAdder dut (.*);
// defparam dut.DELAY_NS = 0; // for now just checking if it works
integer i;
initial begin
for (i = 0; i < 2**3; i++) begin // iterate to 8 to check if it can add
{a, b, c_in} = i; #10;
end
end
endmodule

88
src/hdl/instructmem.sv Normal file
View File

@ -0,0 +1,88 @@
// Instruction ROM. Supports reads only, but is initialized based upon the file specified.
// All accesses are 32-bit. Addresses are byte-addresses, and must be word-aligned (bottom
// two words of the address must be 0).
//
// To change the file that is loaded, edit the filename here:
// `define BENCHMARK "../Benchmarks/test01_AddiB.arm"
// `define BENCHMARK "../benchmarks/test02_AddsSubs.arm"
// `define BENCHMARK "../benchmarks/test03_CbzB.arm"
// `define BENCHMARK "../benchmarks/test04_LdurStur.arm"
// `define BENCHMARK "../benchmarks/test05_Blt.arm"
// `define BENCHMARK "../benchmarks/test06_BlBr.arm"
// `define BENCHMARK "../benchmarks/test10_forwarding.arm"
`define BENCHMARK "../benchmarks/test11_Sort.arm"
// `define BENCHMARK "../benchmarks/test12_Fibonacci.arm"
`timescale 1ns/10ps
// How many bytes are in our memory? Must be a power of two.
`define INSTRUCT_MEM_SIZE 1024
module instructmem (
input logic [63:0] address,
output logic [31:0] instruction,
input logic clk // Memory is combinational, but used for error-checking
);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
// Make sure size is a power of two and reasonable.
initial assert((`INSTRUCT_MEM_SIZE & (`INSTRUCT_MEM_SIZE-1)) == 0 && `INSTRUCT_MEM_SIZE > 4);
// Make sure accesses are reasonable.
always_ff @(posedge clk) begin
if (address !== 'x) begin // address or size could be all X's at startup, so ignore this case.
assert(address[1:0] == 0); // Makes sure address is aligned.
assert(address + 3 < `INSTRUCT_MEM_SIZE); // Make sure address in bounds.
end
end
// The data storage itself.
logic [31:0] mem [`INSTRUCT_MEM_SIZE/4-1:0];
// Load the program - change the filename to pick a different program.
initial begin
$readmemb(`BENCHMARK, mem);
$display("Running benchmark: ", `BENCHMARK);
end
// Handle the reads.
integer i;
always_comb begin
if (address + 3 >= `INSTRUCT_MEM_SIZE)
instruction = 'x;
else
instruction = mem[address/4];
end
endmodule
module instructmem_testbench ();
parameter ClockDelay = 5000;
logic [63:0] address;
logic clk;
logic [31:0] instruction;
instructmem dut (.address, .instruction, .clk);
initial begin // Set up the clock
clk <= 0;
forever #(ClockDelay/2) clk <= ~clk;
end
integer i;
initial begin
// Read every location, including just past the end of the memory.
for (i=0; i <= `INSTRUCT_MEM_SIZE; i = i + 4) begin
address <= i;
@(posedge clk);
end
$stop;
end
endmodule

93
src/hdl/math.sv Normal file
View File

@ -0,0 +1,93 @@
// A few math subunits.
// The multipler can be used to implement the MUL instruction,
// and the shifter can be used to implement LSL and/or LSR.
// DO NOT USE for any other purpose.
module mult (
input logic [63:0] A, B,
input logic doSigned, // 1: signed multiply 0: unsigned multiply
output logic [63:0] mult_low, mult_high
);
logic signed [63:0] signedA, signedB;
logic signed [127:0] signedResult;
logic [127:0] unsignedResult;
// --- Signed math ---
always_comb begin
signedA = A;
signedB = B;
signedResult = signedA * signedB;
end
// --- Unsigned math ---
always_comb
unsignedResult = A * B;
// --- Pick the right output ---
always_comb
if (doSigned)
{mult_high, mult_low} = signedResult;
else
{mult_high, mult_low} = unsignedResult;
endmodule
module shifter(
input logic [63:0] value,
input logic direction, // 0: left, 1: right
input logic [5:0] distance,
output logic [63:0] result
);
always_comb begin
if (direction == 0)
result = value << distance;
else
result = value >> distance;
end
endmodule
module shifter_testbench();
logic [63:0] value;
logic direction;
logic [5:0] distance;
logic [63:0] result;
shifter dut (.value, .direction, .distance, .result);
integer i, dir;
initial begin
value = 64'hDEADBEEFDECAFBAD;
for(dir=0; dir<2; dir++) begin
direction <= dir[0];
for(i=0; i<64; i++) begin
distance <= i; #10;
end
end
end
endmodule
module mult_testbench();
logic [63:0] A, B;
logic doSigned;
logic [63:0] mult_low, mult_high;
logic [127:0] fullVal;
mult dut (.A, .B, .doSigned, .mult_low, .mult_high);
assign fullVal = {mult_high, mult_low};
integer i;
initial begin
for(i=0; i<2; i++) begin
doSigned <= i[0];
A <= 0; B <= 0; #10;
A <= 1; B <= 2; #10;
A <= -1; B <= 1; #10;
A <= -1; B <= -1; #10;
A <= 5<<35; B <= 6<<35; #10;
end
end
endmodule

29
src/hdl/mux16.sv Normal file
View File

@ -0,0 +1,29 @@
// create a 16:1 mux from two 8:1 mux which by a 2:1 mux
`timescale 1ns/10ps
module mux16 (out, in, sel);
input logic [15:0] in;
input logic [3:0] sel;
output logic out;
// internal wire to connect the muxes input/output
logic w1, w2, w3;
// two mux 8:1 which output connects to a 2:1 mux
mux8 m1 (.out(w1), .in(in[7:0]), .sel(sel[2:0]));
mux8 m2 (.out(w2), .in(in[15:8]), .sel(sel[2:0]));
mux2_1 m3 (.out, .i0(w1), .i1(w2), .sel(sel[3]));
endmodule
module mux16_testbench();
logic [15:0] in;
logic [3:0] sel;
logic out;
mux16 dut (.*);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**20; i++) begin
{in, sel} = i; #1;
end
end
endmodule

29
src/hdl/mux2_1.sv Normal file
View File

@ -0,0 +1,29 @@
// a gate-level 2:1 mux
`timescale 1ns/10ps
module mux2_1 #(parameter DELAY_NS=0.05) (out, i0, i1, sel);
// 2 bit input, 1 bit selector, and 1 bit output
output logic out;
input logic i0, i1, sel;
// wire inverting selector bit, and the output of two and gate
logic invSel, nand1, nand2;
// out logic
not #DELAY_NS nselect(invSel, sel);
nand #DELAY_NS u1(nand1, i1, sel);
nand #DELAY_NS u2(nand2, i0, invSel);
nand #DELAY_NS res(out, nand1, nand2);
endmodule
module mux2_1_testbench();
logic i0, i1, sel;
logic out;
mux2_1 dut (.out, .i0, .i1, .sel);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**3; i++) begin
{i0, i1, sel} = i; #1;
end
end
endmodule

28
src/hdl/mux2_n.sv Normal file
View File

@ -0,0 +1,28 @@
// n bits 2:1 mux from 5 2:1 mux
`timescale 1ns/10ps
module mux2_n #(parameter WIDTH=64) (datOut, datIn0, datIn1, sel);
input logic [WIDTH - 1:0] datIn0, datIn1;
input logic sel;
output logic [WIDTH - 1:0] datOut;
// internal wire that transposed the width and depth of the module's data to 64 mux32:1's data
genvar i;
generate
for (i = 0; i < WIDTH; i ++) begin : genmux
mux2_1 muxes (.out(datOut[i]), .i0(datIn0[i]), .i1(datIn1[i]), .sel);
end
endgenerate
endmodule
module mux2_n_tb();
logic [63:0] datIn0, datIn1;
logic sel;
logic [63:0] datOut;
mux2_n dut (.*);
integer i;
initial begin // give some values to the mux, and let it select
datIn0 = 64'h00000000000000A0; datIn1 = 64'h00000000000000EF;
sel = 1'b0; #10;
sel = 1'b1; #10;
end
endmodule

29
src/hdl/mux32.sv Normal file
View File

@ -0,0 +1,29 @@
// create a 32:1 mux from two 16:1 mux which by a 2:1 mux
`timescale 1ns/10ps
module mux32(out, in, sel);
input logic [31:0] in;
input logic [4:0] sel;
output logic out;
// internal wire to connect the muxes input/output
logic w1, w2, w3;
// two mux 16:1 which output connects to a 2:1 mux
mux16 m1 (.out(w1), .in(in[15:0]), .sel(sel[3:0]));
mux16 m2 (.out(w2), .in(in[31:16]), .sel(sel[3:0]));
mux2_1 m3 (.out, .i0(w1), .i1(w2), .sel(sel[4]));
endmodule
module mux32_testbench();
logic [31:0] in;
logic [4:0] sel;
logic out;
mux32 dut (.*);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**10; i++) begin
{in, sel} = i; #1;
end
end
endmodule

44
src/hdl/mux32_64.sv Normal file
View File

@ -0,0 +1,44 @@
// 64 bits 32:1 mux from 64 31:1 mux
`timescale 1ns/10ps
module mux32_64 #(parameter WIDTH=64, ADDR=5) (readDat, datIn, readReg);
input logic [31:0][WIDTH - 1:0] datIn;
input logic [ADDR - 1:0] readReg;
output logic [WIDTH - 1:0] readDat;
// internal wire that transposed the width and depth of the module's data to 64 mux32:1's data
logic [WIDTH - 1 : 0][31:0] transposed;
integer j, k;
// wire 32 64-bit-DFF's buses to 64 32:1 mux's
always_comb begin
for (j = 0; j < WIDTH; j++) begin
for (k = 0; k < 32; k++) begin
transposed[j][k] = datIn[k][j];
end
end
end
// generate 64 of 32:1 mux
genvar i;
generate
for (i = 0; i < WIDTH; i ++) begin : genmux
mux32 muxes (.out(readDat[i]), .in(transposed[i]), .sel(readReg));
end
endgenerate
endmodule
module mux32_64_testbench();
logic [31:0][63:0] datIn;
logic [4:0] readReg;
logic [63:0] readDat;
mux32_64 dut (.*);
integer i;
initial begin
for (i=0; i<32; i=i+1) begin
datIn[i] = i * 64'h00000000000000A0; // addr # multiplies a fixed hex seed
end
for (i = 0; i < 2**5; i++) begin
readReg = i; #10; // select an addr to output
end
end
endmodule

27
src/hdl/mux3_1.sv Normal file
View File

@ -0,0 +1,27 @@
// a gate-level 2:1 mux
`timescale 1ns/10ps
module mux3_1 (out, i0, i1, i2, sel0, sel1);
// 2 bit input, 1 bit selector, and 1 bit output
output logic out;
input logic i0, i1, i2, sel0, sel1;
// wire inverting selector bit, and the output of two and gate
logic x1;
// out logic
mux2_1 mux1 (x1, i0, i1, sel0);
mux2_1 mux2 (out, x1, i2, sel1);
endmodule
module mux3_1_testbench();
logic i0, i1, i2, sel0, sel1;
logic out;
mux3_1 dut (.out, .i0, .i1, .i2, .sel0, .sel1);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**3; i++) begin
{i0, i1, i2, sel0, sel1} = i; #1;
end
end
endmodule

46
src/hdl/mux3_64.sv Normal file
View File

@ -0,0 +1,46 @@
// 64 bits 32:1 mux from 64 31:1 mux
`timescale 1ns/10ps
module mux3_64 #(parameter WIDTH=64) (readDat, datIn1, datIn2, datIn3, readReg);
input logic [WIDTH - 1:0] datIn1;
input logic [WIDTH - 1:0] datIn2;
input logic [WIDTH - 1:0] datIn3;
input logic [1:0] readReg;
output logic [WIDTH - 1:0] readDat;
// internal wire that transposed the width and depth of the module's data to 64 mux32:1's data
logic [WIDTH - 1 : 0][2:0] transposed;
integer j, k;
// wire 32 64-bit-DFF's buses to 64 32:1 mux's
always_comb begin
for (j = 0; j < WIDTH; j++) begin
transposed[j][0] = datIn1[j];
transposed[j][1] = datIn2[j];
transposed[j][2] = datIn2[j];
end
end
// generate 64 of 32:1 mux
genvar i;
generate
for (i = 0; i < WIDTH; i ++) begin : genmux
mux3 muxes (.out(readDat[i]), .in(transposed[i]), .sel(readReg));
end
endgenerate
endmodule
module mux3_64_testbench();
logic [63:0] datIn1, datIn2, datIn3;
logic [1:0] readReg;
logic [63:0] readDat;
mux3_64 dut (.*);
integer i;
initial begin
for (i=0; i<3; i=i+1) begin
datIn1[i] = i * 64'h00000000000000A0; // addr # multiplies a fixed hex seed
end
for (i = 0; i < 3; i++) begin
readReg = i; #10; // select an addr to output
end
end
endmodule

31
src/hdl/mux4_1.sv Normal file
View File

@ -0,0 +1,31 @@
// create a 4:1 mux from two 2:1 mux with another 2:1 mux
`timescale 1ns/10ps
module mux4_1 #(parameter DELAY_NS=0.05) (out, in, sel);
input logic [3:0] in;
input logic [1:0] sel;
output logic out;
// internal wire to connect the muxes input/output
logic w1, w2, w3;
// two mux 2:1 which output connects to a third 2:1 mux
mux2_1 #DELAY_NS m1 (.out(w1), .i0(in[0]), .i1(in[1]), .sel(sel[0]));
mux2_1 #DELAY_NS m2 (.out(w2), .i0(in[2]), .i1(in[3]), .sel(sel[0]));
mux2_1 #DELAY_NS m3 (.out, .i0(w1), .i1(w2), .sel(sel[1]));
endmodule
module mux4_1_testbench();
logic [3:0] in;
logic [1:0] sel;
logic out;
mux4_1 dut (.*);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**6; i++) begin
{in, sel} = i; #1;
end
end
endmodule

32
src/hdl/mux4_64.sv Normal file
View File

@ -0,0 +1,32 @@
// 64-bit mux4:1
`timescale 1ns/10ps
module mux4_64 (out, i0, i1, i2, i3, sel);
input logic [63:0] i0, i1, i2, i3;
input logic [1:0] sel;
output logic [63:0] out;
genvar i;
generate
for (i = 0; i < 64; i ++) begin : genmux
mux4_1 oneMux (.out(out[i]), .in({i3[i], i2[i], i1[i], i0[i]}), .sel);
end
endgenerate
endmodule
module mux4_64_tb();
logic [63:0] i0, i1, i2, i3;
logic [1:0] sel;
logic [63:0] out;
mux4_64 dut (.*);
integer i;
initial begin // Test all input variations
i0 = 64'd1; i1 = 64'd2; i2 = 64'd3; i3 = 64'd4; sel = 2'b00; #1; // should be 1
i0 = 64'd1; i1 = 64'd2; i2 = 64'd3; i3 = 64'd4; sel = 2'b01; #1; // should be 2
i0 = 64'd1; i1 = 64'd2; i2 = 64'd3; i3 = 64'd4; sel = 2'b10; #1; // should be 3
i0 = 64'd1; i1 = 64'd2; i2 = 64'd3; i3 = 64'd4; sel = 2'b11; #1; // should be 4
end
endmodule

31
src/hdl/mux8.sv Normal file
View File

@ -0,0 +1,31 @@
// create a 8:1 mux from two 4:1 mux which by a 2:1 mux
`timescale 1ns/10ps
module mux8 #(parameter DELAY_NS=0.05) (out, in, sel);
input logic [7:0] in;
input logic [2:0] sel;
output logic out;
// internal wire to connect the muxes input/output
logic w1, w2, w3;
// two mux 4:1 which output connects to a 2:1 mux
mux4_1 #DELAY_NS m1 (.out(w1), .in(in[3:0]), .sel(sel[1:0]));
mux4_1 #DELAY_NS m2 (.out(w2), .in(in[7:4]), .sel(sel[1:0]));
mux2_1 #DELAY_NS m3 (.out, .i0(w1), .i1(w2), .sel(sel[2]));
endmodule
module mux8_testbench();
logic [7:0] in;
logic [2:0] sel;
logic out;
mux8 dut (.*);
integer i;
initial begin // Test all input variations
for (i = 0; i < 2**11; i++) begin
{in, sel} = i; #1;
end
end
endmodule

32
src/hdl/nor16.sv Normal file
View File

@ -0,0 +1,32 @@
`timescale 1ns/10ps
// nor gate for 16 inputs
module nor16 (in, out);
input logic [15:0] in;
output logic out;
wire nor1, nor2, nor3, nor4;
// nor16 using 4 nor4 and an and4
nor #50 n1 (nor1, in[0], in[1], in[2], in[3]);
nor #50 n2 (nor2, in[4], in[5], in[6], in[7]);
nor #50 n3 (nor3, in[8], in[9], in[10], in[11]);
nor #50 n4 (nor4, in[12], in[13], in[14], in[15]);
and #50 a1 (out, nor1, nor2, nor3, nor4);
endmodule
// tb for nor16
module nor16_testbench();
logic [15:0] in;
logic out;
nor16 dut (.*);
initial begin
in = 16'b0; #1000;
in = 16'b0001110001110001; #1000;
in = 16'b1000000000000000; #1000;
in = 16'b1111111111111111; #1000;
end
endmodule

31
src/hdl/nor64.sv Normal file
View File

@ -0,0 +1,31 @@
`timescale 1ns/10ps
// nor gate for 64 inputs
module nor64 (in, out);
input logic [63:0] in;
output logic out;
wire nor161, nor162, nor163, nor164;
// nor16 using 4 nor16 and an and4
nor16 n1 (in[15:0], nor161);
nor16 n2 (in[31:16], nor162);
nor16 n3 (in[47:32], nor163);
nor16 n4 (in[63:48], nor164);
and #50 a (out, nor161, nor162, nor163, nor164);
endmodule
module nor64_testbench();
logic [63:0] in;
logic out;
nor64 dut (.*);
initial begin
in = 64'b0; #1000;
in = 64'd1; #1000;
in = 64'd114514; #1000;
in = 64'd919810; #1000;
end
endmodule

42
src/hdl/reg16.sv Normal file
View File

@ -0,0 +1,42 @@
// a 16-bit register from four 4-bit registers
`timescale 1ns/10ps
module reg16 (in, out, en, clk, reset);
input logic [15:0] in;
input logic en, clk, reset;
output logic [15:0] out;
// 4 DFF_E, each represents 1 bit in the register
reg4 r0 (in[3:0], out[3:0], en, clk, reset);
reg4 r1 (in[7:4], out[7:4], en, clk, reset);
reg4 r2 (in[11:8], out[11:8], en, clk, reset);
reg4 r3 (in[15:12], out[15:12], en, clk, reset);
endmodule
module reg16_testbench();
logic [15:0] in, out;
logic reset, clk, en;
reg16 dut (.in, .out, .en, .clk, .reset);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// write in some values and check if register values stays when enable is low, or if is writes in if enable is high.
reset <= 0; en <= 0; in <= 4'd65536; @(posedge clk);
en <= 1; in <= 16'd11451; @(posedge clk);
en <= 1; in <= 16'd19198; @(posedge clk);
en <= 0; in <= 16'd19198; @(posedge clk);
en <= 0; in <= 16'd14; @(posedge clk);
en <= 1; in <= 16'd32; @(posedge clk);
repeat(2) @(posedge clk);
$stop;
end
endmodule

42
src/hdl/reg4.sv Normal file
View File

@ -0,0 +1,42 @@
// four bits register from four enabled register
`timescale 1ns/10ps
module reg4 (in, out, en, clk, reset);
input logic [3:0] in;
input logic en, clk, reset;
output logic [3:0] out;
// 4 DFF_E, each represents 1 bit in the register
DFF_with_enable d0 (in[0], reset, clk, en, out[0]);
DFF_with_enable d1 (in[1], reset, clk, en, out[1]);
DFF_with_enable d2 (in[2], reset, clk, en, out[2]);
DFF_with_enable d3 (in[3], reset, clk, en, out[3]);
endmodule
module reg4_testbench();
logic [3:0] in, out;
logic reset, clk, en;
reg4 dut (.in, .out, .en, .clk, .reset);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// write in some values and check if register values stays when enable is low, or if is writes in if enable is high.
reset <= 0; en <= 0; in <= 4'b1111; @(posedge clk);
en <= 1; in <= 4'b1111; @(posedge clk);
en <= 1; in <= 4'b1001; @(posedge clk);
en <= 0; in <= 4'b1001; @(posedge clk);
en <= 0; in <= 4'b1111; @(posedge clk);
en <= 1; in <= 4'b0000; @(posedge clk);
repeat(2) @(posedge clk);
$stop;
end
endmodule

42
src/hdl/reg64.sv Normal file
View File

@ -0,0 +1,42 @@
// a 64-bit enabled register from four 16-bit registers
`timescale 1ns/10ps
module reg64 (in, out, en, clk, reset);
input logic [63:0] in;
input logic en, clk, reset;
output logic [63:0] out;
// 4 DFF_Enabled, each represents 1 bit in the register
reg16 r0 (in[15:0], out[15:0], en, clk, reset);
reg16 r1 (in[31:16], out[31:16], en, clk, reset);
reg16 r2 (in[47:32], out[47:32], en, clk, reset);
reg16 r3 (in[63:48], out[63:48], en, clk, reset);
endmodule
module reg64_testbench();
logic [63:0] in, out;
logic reset, clk, en;
reg64 dut (.in, .out, .en, .clk, .reset);
// Set up a simulated clock.
parameter CLOCK_PERIOD=10;
initial begin
clk <= 0;
forever #(CLOCK_PERIOD/2) clk <= ~clk; // Forever toggle the clock
end
initial begin
reset <= 1; @(posedge clk); // Always reset FSMs at start
// write in some values and check if register values stays when enable is low, or if is writes in if enable is high.
reset <= 0; en <= 0; in <= 64'd4254663563463457; @(posedge clk);
en <= 1; in <= 64'd1145141919810; @(posedge clk);
en <= 1; in <= 64'd8689689680; @(posedge clk);
en <= 0; in <= 64'd0; @(posedge clk);
en <= 0; in <= 64'd8689689680; @(posedge clk);
en <= 1; in <= 64'd1145141919810; @(posedge clk);
repeat(2) @(posedge clk);
$stop;
end
endmodule

68
src/hdl/reg64x32.sv Normal file
View File

@ -0,0 +1,68 @@
// 32 64-bits dff connected in parallel
`timescale 1ns/10ps
module reg64x32 (in, out, en, clk, reset);
input logic [63:0] in;
input logic [31:0] en;
input logic clk, reset;
output logic [31:0][63:0] out;
genvar i;
// use for loop to generate an array of 31 64-bit registers
generate
for (i = 0; i < 31; i++) begin : manyDFFs
reg64 bigReg (.in(in), .out(out[i]), .en(en[i]), .clk, .reset);
end
endgenerate
// the 32nd register is the zero register, always write zero to it.
reg64 reg0 (.in(64'd0), .out(out[31]), .en(1'b1), .clk, .reset);
endmodule
module reg64x32_testbench();
logic [63:0] in;
logic [31:0] en;
logic clk, reset;
logic [31:0][63:0] out; // output
integer i;
assign reset = 1'b0;
reg64x32 dut (.*);
initial $timeformat(-9, 2, " ns", 10);
initial begin // Set up the clock
clk <= 0;
forever #(5000/2) clk <= ~clk; // clk delay is 5000
end
initial begin
// Try to write the value 0xA0 into register 31.
// Register 31 should always be at the value of 0.
$display("%t Attempting overwrite of register 31, which should always be 0", $time);
en <= 31'b1 << 31;
in <= 64'h00000000000000A0;
@(posedge clk);
// $display("%t Writing pattern to all registers.", $time);
for (i=0; i<31; i=i+1) begin
en <= 0;
@(posedge clk);
$display("%t Writing pattern to %i", $time, i);
en <= 31'b1 << i;
in <= i*64'h0000010204080001;
@(posedge clk);
end
// Go back and verify that the registers
// retained the data.
$display("%t Checking pattern.", $time);
en <= 31'b0;
for (i=0; i<32; i=i+1) begin
// WriteRegister <= i;
in <= i*64'h0000000000000100+i;
@(posedge clk);
end
$stop;
end
endmodule

32
src/hdl/regfile.sv Normal file
View File

@ -0,0 +1,32 @@
// toplevel for register file, with two 5-bit read selector input and one 5-bit write inputs, and takes in 64-bit data and returns two 63-bit data depends on the output, with write enebled option
`timescale 1ns/10ps
module regfile(RegWrite, clk, ReadRegister1, ReadRegister2, WriteData, WriteRegister, ReadData1, ReadData2);
input logic RegWrite;
input logic clk;
input logic [4:0] ReadRegister1;
input logic [4:0] ReadRegister2;
input logic [63:0] WriteData;
input logic [4:0] WriteRegister;
output logic [63:0] ReadData1;
output logic [63:0] ReadData2;
// generate wires to connect decodes to DFF's enable, and from DFF data to two muxes
logic [31:0] en;
logic [31:0][63:0] dataBus;
// call the array of 32 64-bit registers
reg64x32 regs(.clk(clk), .reset(1'b0), .en, .in(WriteData), .out(dataBus));
// create two 64-bit 31:1 muxes to allow select different register output
mux32_64 mux1(.datIn(dataBus), .readReg(ReadRegister1), .readDat(ReadData1));
// defined the mux's selector bits and inputs bit
defparam mux1.ADDR = 5;
defparam mux1.WIDTH = 64;
mux32_64 mux2( .datIn(dataBus), .readReg(ReadRegister2), .readDat(ReadData2));
defparam mux2.ADDR = 5;
defparam mux2.WIDTH = 64;
// connect the 5x32 decoder to the 64bit registers to allow at most only one register to be able to get write value each time
decoder_5x32 decoder(.en(RegWrite), .in(WriteRegister), .out(en));
endmodule

71
src/hdl/regstim.sv Normal file
View File

@ -0,0 +1,71 @@
// Test bench for Register file
`timescale 1ns/10ps
module regstim();
parameter ClockDelay = 5000;
logic [4:0] ReadRegister1, ReadRegister2, WriteRegister;
logic [63:0] WriteData;
logic RegWrite, clk;
logic [63:0] ReadData1, ReadData2;
integer i;
// Your register file MUST be named "regfile".
// Also you must make sure that the port declarations
// match up with the module instance in this stimulus file.
regfile dut (.ReadData1, .ReadData2, .WriteData,
.ReadRegister1, .ReadRegister2, .WriteRegister,
.RegWrite, .clk);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
initial begin // Set up the clock
clk <= 0;
forever #(ClockDelay/2) clk <= ~clk;
end
initial begin
// Try to write the value 0xA0 into register 31.
// Register 31 should always be at the value of 0.
RegWrite <= 5'd0;
ReadRegister1 <= 5'd0;
ReadRegister2 <= 5'd0;
WriteRegister <= 5'd31;
WriteData <= 64'h00000000000000A0;
@(posedge clk);
$display("%t Attempting overwrite of register 31, which should always be 0", $time);
RegWrite <= 1;
@(posedge clk);
// Write a value into each register.
$display("%t Writing pattern to all registers.", $time);
for (i=0; i<31; i=i+1) begin
RegWrite <= 0;
ReadRegister1 <= i-1;
ReadRegister2 <= i;
WriteRegister <= i;
WriteData <= i*64'h0000010204080001;
@(posedge clk);
RegWrite <= 1;
@(posedge clk);
end
// Go back and verify that the registers
// retained the data.
$display("%t Checking pattern.", $time);
for (i=0; i<32; i=i+1) begin
RegWrite <= 0;
ReadRegister1 <= i-1;
ReadRegister2 <= i;
WriteRegister <= i;
WriteData <= i*64'h0000000000000100+i;
@(posedge clk);
end
$stop;
end
endmodule

127
src/hdl/slice.sv Normal file
View File

@ -0,0 +1,127 @@
/* a gate-level 1-bit slice ALU based on Oct11 lecture slide, but has more selections
* from slide 3, a, b are just a0 b0 a1 b1..... and cin_i is just the red arrow goes into the 1-bit slicer
* e.g. s0 for cin_i on the first 1-bit slice. and count_i is the red arrow coming out of a slice
* and ri is the result bit from this ALU
*/
`timescale 1ns/10ps
module slice #(parameter DELAY_NS=0.05) (a, b, cin_i, sel, cout_i, ri);
input logic a, b, cin_i;
input logic [2:0] sel; // selector for the mux
output logic cout_i, ri;
// copied from given toplevel tb
// cntrl Operation Notes:
// 000: result = B value of overflow and carry_out unimportant
// 010: result = A + B
// 011: result = A - B
// 100: result = bitwise A & B value of overflow and carry_out unimportant
// 101: result = bitwise A | B value of overflow and carry_out unimportant
// 110: result = bitwise A XOR B value of overflow and carry_out unimportant
logic [7:0] muxIn;
logic invB, bIn, sub; // inverted B, actual B from 2:1 mux, and, subtract indecator (011, or x11 since 111 is no in use)
// invert b
not #DELAY_NS nb (invB, b);
// determines if the operation is subtract or not
and #DELAY_NS subSel (sub, sel[1], sel[0]);
// determine value b with add or sub
mux2_1 #DELAY_NS bsel (.out(bIn), .i0(b), .i1(invB), .sel(sub));
//add_sub
fullAdder #DELAY_NS adder (.a, .b(bIn), .c_in(cin_i), .sum(muxIn[2]), .c_out(cout_i));
assign muxIn[3] = muxIn[2]; // wire mux input 010 and 011 together since they all come from full adder
assign muxIn[0] = b; // wired for result = b
// two empty inputs for mux8, just connect GND to make always-low
assign muxIn[1] = 0;
assign muxIn[7] = 0;
// control 100: result = a & b
and #DELAY_NS abAnd (muxIn[4], a, b);
// control 101: result = a | b
or #DELAY_NS abOr (muxIn[5], a, b);
// control 110: result = a ^ b
xor #DELAY_NS abXor (muxIn[6], a, b);
// mux selecting result for operation
mux8 #DELAY_NS selRes (.out(ri), .in(muxIn), .sel);
endmodule
module slice_testbench();
// tb similar for the toplevel, minus the assersion
parameter delay = 1000;
logic a, b, cin_i;
logic [2:0] sel; // selector for the mux
logic cout_i, ri;
// using similar signal selection from toplevel tb
parameter ALU_PASS_B=3'b000, ALU_ADD=3'b010, ALU_SUBTRACT=3'b011, ALU_AND=3'b100, ALU_OR=3'b101, ALU_XOR=3'b110;
slice dut(.*);
// Force %t's to print in a nice format.
initial $timeformat(-9, 2, " ns", 10);
integer i;
initial begin
$display("%t and", $time);
sel = ALU_AND;
for (i=0; i<4; i++) begin // iterate some numbers to check bit and functionality
{a, b} = i;
#(delay);
end
$display("%t or", $time);
sel = ALU_OR;
for (i=0; i<4; i++) begin // iterate some numbers to check bit or functionality
{a, b} = i;
#(delay);
end
$display("%t xor", $time);
sel = ALU_XOR;
for (i=0; i<4; i++) begin // iterate some numbers to check bit xor functionality
{a, b} = i;
#(delay);
end
$display("%t add", $time); // iterate some numbers to check add functionality
sel = ALU_ADD;
cin_i = 0;
for (i=0; i<4; i++) begin
{a, b} = i;
#(delay);
end
$display("%t sub", $time);
sel = ALU_SUBTRACT;
cin_i = 1;
for (i=0; i<4; i++) begin // iterate some numbers to check subtract functionality, on the first slice
{a, b} = i;
#(delay);
end
$display("%t test subtract", $time);
sel = ALU_SUBTRACT;
cin_i = 0;
for (i=0; i<4; i++) begin // iterate some numbers to check subtract functionality, on the rest of the slice
{a, b} = i;
#(delay);
end
$display("%t test pass b", $time);
sel = ALU_PASS_B;
cin_i = 0;
for (i=0; i<4; i++) begin // iterate some numbers to check pass b functionality
{a, b} = i;
#(delay);
end
end
endmodule

1
src/python/README.md Normal file
View File

@ -0,0 +1 @@

1
tools/README.md Normal file
View File

@ -0,0 +1 @@

View File

@ -0,0 +1,31 @@
// Test of ADDI instruction, with a final B(ranch) instruction to stay in one place.
// Requires:
// ADDI & B instructions
// Expected results:
// X0 = 0
// X1 = 1
// X2 = 2
// X3 = 3
// X4 = 4
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//Note: X31 is always 0.
// MAIN:
1001000100_000000000000_11111_00000 // ADDI X0, X31, #0 // X0 = 0
1001000100_000000000001_00000_00001 // ADDI X1, X0, #1 // X1 = 1
1001000100_000000000001_00001_00010 // ADDI X2, X1, #1 // X2 = 2
1001000100_000000000010_00001_00011 // ADDI X3, X1, #2 // X3 = 3
1001000100_000000000100_00000_00100 // ADDI X4, X0, #4 // X4 = 4
000101_00000000000000000000000000 // HALT:B HALT // HALT = 0
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Bogus instruction - pipelined CPU may need it.

View File

@ -0,0 +1,49 @@
// Test of SUB instruction.
// Requires:
// ADDS, SUBS, ADDI & B instructions
// Expected results:
// X0 = 1
// X1 = -1
// X2 = 2
// X3 = -3
// X4 = -2
// X5 = -5
// X6 = 0
// X7 = -6
// Flags: negative = 1, carry-out = 1, overflow = 0, zero = 0
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11101011000 0..31 000000 0..31 0..31
//ADDS: R-type, Reg[Rd] = Reg[Rn] + Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//10101011000 0..31 000000 0..31 0..31
// MAIN:
1001000100_000000000001_11111_00000 // ADDI X0, X31, #1 // X0 = 1
11101011000_00000_000000_11111_00001 // SUBS X1, X31, X0 // X1 = -1
11101011000_00001_000000_00000_00010 // SUBS X2, X0, X1 // X2 = 2
11101011000_00010_000000_00001_00011 // SUBS X3, X1, X2 // X3 = -3
11101011000_00001_000000_00011_00100 // SUBS X4, X3, X1 // X4 = -2
10101011000_00100_000000_00011_00101 // ADDS X5, X3, X4 // X5 = -5
10101011000_00001_000000_00000_00110 // ADDS X6, X0, X1 // X6 = 0
10101011000_00101_000000_00001_00111 // ADDS X7, X1, X5 // X7 = -6. Flags: negative, carry-out
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP - should NOT write the flags.
000101_00000000000000000000000000 // HALT:B HALT // (HALT = 0)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Bogus instruction - pipelined CPU may need it.

View File

@ -0,0 +1,76 @@
// Test of CBZ and B instruction.
// Requires:
// CBZ, B, & ADDI instructions
// Expected results:
// X0 = 1
// X1 = 0 (anything else indicates an error)
// X2 = 0 on a single-cycle CPU, 4 on pipelined CPUs (counts delay slots executed)
// X3 = 1 (signifies program end was reached)
// X4 = 16+8+4+2+1 = 31 (bit per properly executed branch)
// X5 = 0 (should never get incremented, means accelerated branches not working).
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//CBZ: CB-type, if (R[Rt] == 0) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Rt
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//10110100 2's Comp Imm19 0..31
// MAIN:
1001000100_000000000001_11111_00000 // ADDI X0, X31, #1 // Constant 1 register for testing
1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // Error register, should never be non-zero
1001000100_000000000000_11111_00010 // ADDI X2, X31, #0 // Delay slot counter. Value depends on delay slots.
1001000100_000000000000_11111_00011 // ADDI X3, X31, #0 // Flag for when we get to the final result.
1001000100_000000000000_11111_00100 // ADDI X4, X31, #0 // Set each bit as you do a branch correctly.
1001000100_000000000000_11111_00101 // ADDI X5, X31, #0 // Set if branches have >1 delay slot.
000101_00000000000000000000001100 // B FORWARD_B // 1st taken branch (+12*4)
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // delay_slot++
1001000100_000000000001_00101_00101 // ADDI X5, X5, #1 // Should never reach here.
// ERROR: // Should never get here.
1001000100_000000000001_11111_00001 // ADDI X1, X31, #1 // Error = 1
000101_11111111111111111111111111 // B ERROR // Loop forever (-1)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
// BACKWARD_B: // Target for a backwards branch
1001000100_000000000010_00100_00100 // ADDI X4, X4, #2 // 2nd branch succeeded
10110100_0000000000000010100_11111 // CBZ X31, FORWARD_CBZ // 3rd taken branch (+20)
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // delay_slot++
1001000100_000000000001_00101_00101 // ADDI X5, X5, 1 // Should never reach here
000101_11111111111111111111111001 // B ERROR // Should never reach here (-7)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOP
// FORWARD_B:
1001000100_000000000001_00100_00100 // ADDI X4, X4, 1 // 1st branch succeeded.
000101_11111111111111111111111001 // B BACKWARD_B // 2nd taken branch (-7)
1001000100_000000000001_00010_00010 // ADDI X2, X2, 1 // delay_slot++
1001000100_000000000001_00101_00101 // ADDI X5, X5, 1 // Should never reach here
000101_11111111111111111111110011 // B ERROR // Should never reach here (-13)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOOP
// BACKWARD_CBZ:
1001000100_000000001000_00100_00100 // ADDI X4, X4, 8 // 4th branch succeeded.
10110100_0000000000000000110_00000 // CBZ X0, NOT_TAKEN // X0 != 0, so no branch (+6)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOOP
1001000100_000000010000_00100_00100 // ADDI X4, X4, 16 // Successfully didn't branch.
1001000100_000000000001_11111_00011 // ADDI X3, X31, 1 // Flag for finishing.
// HALT:
000101_00000000000000000000000000 // B HALT // Loop forever (0)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOOP
// NOT_TAKEN:
000101_11111111111111111111101010 // B ERROR // Should never reach here (-22)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOOP
// FORWARD_CBZ:
1001000100_000000000100_00100_00100 // ADDI X4, X4, 4 // 3rd branch succeeded.
10110100_1111111111111110110_11111 // CBZ X31, BACKWARD_CBZ // 4th taken branch (-10)
1001000100_000000000001_00010_00010 // ADDI X2, X2, 1 // delay_slot++
1001000100_000000000001_00101_00101 // ADDI X5, X5, 1 // Should never reach here.
000101_11111111111111111111100100 // B ERROR // Should never reach here (-28)
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // NOOP

View File

@ -0,0 +1,55 @@
// Test of LDUR and STUR instructions
// Requires:
// B, ADDI, LDUR, & STUR instructions
// Expected results:
// X0 = 1
// X1 = 2
// X2 = 3
// X3 = 8
// X4 = 11
// X5 = 1
// X6 = 2
// X7 = 3
// Mem[0] = 1
// Mem[8] = 2
// Mem[16] = 3
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//LDUR: D-type, Reg[Rt] = Mem[Reg[Rn] + SignExtend(Imm9)]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000010 2's Comp 00 0..31 0..31
//STUR: D-type, Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rt]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000000 2's Comp 00 0..31 0..31
// MAIN:
1001000100_000000000001_11111_00000 // ADDI X0, X31, #1 // X0 = 1
1001000100_000000000010_11111_00001 // ADDI X1, X31, #2 // X1 = 2
1001000100_000000000011_11111_00010 // ADDI X2, X31, #3 // X2 = 3
1001000100_000000001000_11111_00011 // ADDI X3, X31, #8 // X3 = 8
1001000100_000000001011_11111_00100 // ADDI X4, X31, #11 // X4 = 11
11111000000_000000000_00_11111_00000 // STUR X0, [X31, #0] // Mem[0] = 1
11111000000_111111101_00_00100_00001 // STUR X1, [X4, #-3] // Mem[8] = 2
11111000000_000001000_00_00011_00010 // STUR X2, [X3, #8] // Mem[16] = 3
11111000010_000000101_00_00100_00111 // LDUR X7, [X4, #5] // X7 = Mem[16] = 3
11111000010_111111000_00_00011_00101 // LDUR X5, [X3, #-8] // X5 = Mem[0] = 1
11111000010_000000101_00_00010_00110 // LDUR X6, [X2, #5] // X6 = Mem[8] = 2
000101_00000000000000000000000000 // HALT:B HALT // HALT = 0
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Bogus instruction <20> pipelined CPU may need it

View File

@ -0,0 +1,68 @@
// Test of B.LT instruction.
// Requires:
// B.LT, SUBS, ADDI & B instructions
// Expected results:
// X0 = 1
// X1 = 1
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11101011000 0..31 000000 0..31 0..31
//B.cond: CB-type, if (flags meet condition) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Cond
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//01010100 2's Comp Imm19 0..15
//
// Cond Name Meaning after SUBS FlagTest
// 00000 EQ Equal Z==1
// 00001 NE Not equal Z==0
// 00010 HS Unsigned >= C==1
// 00011 LO Unsigned < C==0
// 00100 MI Minus N==1
// 00101 PL Plus/0 N==0
// 00110 VS Overflow V==1
// 00111 VC No Overflow V==0
// 01000 HI Unsigned > C==1 && Z==0
// 01001 LS Unsigned <= C==0 || Z==1
// 01010 GE Signed >= N==V
// 01011 LT Signed < N!=V
// 01100 GT Signed > Z==0 && N==V
// 01101 LE Signed <= !(Z==0 && N==V)
// 0111x AL Alway Always
// MAIN:
1001000100_000000000001_11111_00000 // ADDI X0, X31, #1 // X0 = 1, comparison target.
1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // X1 = 0, only set to 1 if we get it all right.
11101011000_00000_000000_00000_11111 // SUBS X31, X0, X0 // 1-1, not less than.
01010100_0000000000000001000_01011 // B.LT ERROR // Don't take (+8)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11101011000_11111_000000_00000_11111 // SUBS X31, X0, X31 // 1 - 0, not less than.
01010100_0000000000000000101_01011 // B.LT ERROR // Don't take (+5)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11101011000_00000_000000_11111_11111 // SUBS X31, X31, X0 // 0 - 1, is less than.
01010100_0000000000000000100_01011 // B.LT SUCCESS // Take this. (+4)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
// ERROR:
000101_00000000000000000000000000 // B ERROR // Should never get here (0)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
// SUCCESS:
1001000100_000000000001_00001_00001 // ADDI X1, X1, #1 // Signal correct operation.
// HALT:
000101_00000000000000000000000000 // B HALT // Loop forever (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP

View File

@ -0,0 +1,68 @@
// Test of BL and BR instructions.
// Requires:
// BL, BR, ADDI & B instructions
// Expected results:
// X0 = 1
// X1 = 0
// X2 = 0 (anything else indicates an error)
// X3 = 1 (signifies program end was reached)
// X4 = 52
// X5 = 64
// X29 = 20
// X30 = 68
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//BL: B-type, PC = PC + SignExtend({Imm26, 2'b00}), X30 = PC + 4 (load into pc - 4 for pipline?)
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//100101 2's Comp Imm26
//BR: R-type, PC = Reg[Rd]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11010110000 0..31 000000 0..31 0..31
// MAIN:
1001000100_000000000001_11111_00000 // 0: ADDI X0, X31, #1 // X0 = 1
1001000100_000000000000_11111_00001 // 4: ADDI X1, X31, #0 // X1 = 0
1001000100_000000000000_11111_00010 // 8: ADDI X2, X31, #0 // X2 = 0
1001000100_000000000000_11111_00011 // 12: ADDI X3, X31, #0 // X3 = 0
100101_00000000000000000000000101 // 16: BL BL_FORWARDS (+5) // X30 = 20
// BL_BACKWARDS:
1001000100_000000110100_11111_00100 // 20: ADDI X4, X31, #52 // X4 = 52
11010110000_00000_000000_00000_00100 // 24: BR X4 (ALMOST_THERE)
1001000100_000000000000_11111_11111 // 28: ADDI X31, X31, #0 // Noop
// BR_ERROR (SHOULD NOT EVER EXECUTE):
1001000100_000000000001_11111_00010 // 32: ADDI X2, X31, #1 // X2 = 1 (Error flag)
// BL_FORWARDS:
1001000100_000000000000_11110_11101 // 36: ADDI X29, X30, #0 // X29 = 20
1001000100_000001000000_11111_00101 // 40: ADDI X5, X31, #64 // X5 = 64
100101_11111111111111111111111010 // 44: BL BL_BACKWARDS (-6) // X30 = 48
1001000100_000000000000_11111_11111 // 48: ADDI X31, X31, #0 // Noop
// ALMOST_THERE:
11010110000_00000_000000_00000_00101 // 52: BR X5 (END)
1001000100_000000000000_11111_11111 // 56: ADDI X31, X31, #0 // Noop
// BR_ERROR (SHOULD NOT EVER EXECUTE):
1001000100_000000000001_11111_00010 // 60: ADDI X2, X31, #1 // X2 = 1 (Error flag)
// END:
100101_00000000000000000000000010 // 64: BL FINAL (+2) // X30 = 68
1001000100_000000000000_11111_11111 // 68: ADDI X31, X31, #0 // Noop
// FINAL:
1001000100_000000000001_11111_00011 // 72: ADDI X3, X31, #1 // X3 = 1
000101_00000000000000000000000000 // 76: B HALT (0)
1001000100_000000000000_11111_11111 // 80: ADDI X31, X31, #0 // Noop
// BL/BR_ERROR (SHOULD NOT EVER EXECUTE):
1001000100_000000000001_11111_00010 // 84: ADDI X2, X31, #1 // X2 = 1 (Error flag)

View File

@ -0,0 +1,149 @@
// Test of CBZ and B instruction.
// Requires:
// ADDI, ADDS, SUB, CBZ, B.LT, B, LDUR, STUR
// Expected results:
// X0 = 0
// X1 = 8
// X2 = 4 (on pipelined CPU), or 0 (single-cycle CPU).
// X3 = 5
// X4 = 7
// X5 = 2
// X6 = -2
// X7 = -2
// X8 = 0
// X9 = 1
// X10 = -4
// X14 = 5
// X15 = 8
// X16 = 9
// X17 = 1
// X18 = 99
// Mem[0] = 8
// Mem[8] = 5
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//CBZ: CB-type, if (R[Rt] == 0) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Rt
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//10110100 2's Comp Imm19 0..31
//SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11101011000 0..31 000000 0..31 0..31
//ADDS: R-type, Reg[Rd] = Reg[Rn] + Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//10101011000 0..31 000000 0..31 0..31
//B.LT: CB-type, if (flags meet condition) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Cond
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//01010100 2's Comp Imm19 01011
//LDUR: D-type, Reg[Rt] = Mem[Reg[Rn] + SignExtend(Imm9)]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000010 2's Comp 00 0..31 0..31
//STUR: D-type, Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rt]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000000 2's Comp 00 0..31 0..31
// MAIN:
1001000100_000000000000_11111_00000 // ADDI X0, X31, #0 // X0 = 0
1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // X1 = 0
1001000100_000000000000_11111_00010 // ADDI X2, X31, #0 // X2 = 0, counter of branch delay slots.
// // Simple forwarding
1001000100_000000000101_11111_00011 // ADDI X3, X31, #5 // X3 = 5
1001000100_000000000010_00011_00100 // ADDI X4, X3, #2 // X4 = 7
11101011000_00011_000000_00100_00101 // SUBS X5, X4, X3 // X5 = 2
11101011000_00100_000000_00011_00110 // SUBS X6, X3, X4 // X6 = -2
11101011000_00100_000000_00011_00111 // SUBS X7, X3, X4 // X7 = -2
// // Forwarding and X31
1001000100_111111111111_11111_11111 // ADDI X31, X31, #-1 // Writing -1 to X31, but it should stay as 0
11101011000_11111_000000_00001_01000 // SUBS X8, X1, X31 // X8 = 0
11101011000_01000_000000_11111_01000 // SUBS X8, X31, X8 // X8 = 0
11101011000_11111_000000_01000_01000 // SUBS X8, X8, X31 // X8 = 0
11101011000_11111_000000_01000_01000 // SUBS X8, X8, X31 // X8 = 0
// // Forwarding in the face of multiple writes
1001000100_000000000010_11111_01001 // ADDI X9, X31, #2 // X9 = 2
1001000100_000000000001_11111_01001 // ADDI X9, X31, #1 // X9 = 1
11101011000_01001_000000_11111_01010 // SUBS X10, X31, X9 // X10 = -1
11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -2
11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -3
11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -4
// // Forwarding involving an instruction that doesn't write the register file
11111000000_000000001_00_00100_00011 // STUR X3, [X4, #1] // Mem[8] = 5
1001000100_000000000000_00011_01110 // ADDI X14, X3, #0 // X14 = 5
// // Forwarding and load/store instructions
1001000100_000000001000_11111_00001 // ADDI X1, X31, 8 // X1 = 8
11111000000_000000000_00_11111_00001 // STUR X1, [X31, #0] // Mem[0] = 8
11111000010_000000000_00_11111_01111 // LDUR X15, [X31, #0] // X15 = Mem[0] = 8
1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // Noop
1001000100_000000000001_01111_10000 // ADDI X16, X15, 1 // X16 = 9
// // Flags and the pipelined CPU (set flag and quickly or slowly branch).
10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0.
11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags
01010100_0000000000000000100_01011 // B.LT TAKEN1 // Take the branch (+4). pc=112
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 1 (increment delay slot counter)
// ERROR1:
000101_00000000000000000000000000 // B ERROR1 // Should never get here (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
// TAKEN1:
10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0.
11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop - same as above but 1 cycle later.
01010100_0000000000000000100_01011 // B.LT TAKEN2 // Take the branch (+4).
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 2 (increment delay slot counter)
// ERROR2:
000101_00000000000000000000000000 // B ERROR2 // Should never get here (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
// TAKEN2:
10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0. pc = 156
11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop - same as above but much longer.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
01010100_0000000000000000100_01011 // B.LT TAKEN3 // Take the branch (+4). pc = 180
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 3 (increment delay slot counter)
// ERROR3:
000101_00000000000000000000000000 // B ERROR3 // Should never get here (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop
// TAKEN3:
1001000100_000000000001_11111_10001 // ADDI X17, X31, #1 // X17 = 1 pc = 196
// // Forwarding to conditional branch
1001000100_000000000010_11111_00000 // ADDI X0, X31, #2 // X0 = 2
10110100_0000000000000000101_00000 // CBZ X0, ERROR4 // Should not be taken (+5).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop.
1001000100_000000000000_11111_00000 // ADDI X0, X31, #0 // X0 = 0
10110100_0000000000000000100_00000 // CBZ X0, SUCCESS // Should be taken (+4). pc = 216
1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 4 (increment delay slot counter)
// ERROR4:
000101_00000000000000000000000000 // B ERROR4 // Loop forever (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop.
// SUCCESS:
1001000100_000001100011_11111_10010 // ADDI X18, X31, #99 // Show that we did finish.
// HALT:
000101_00000000000000000000000000 // B HALT // Done (0).
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop.

View File

@ -0,0 +1,125 @@
// Bubble-sort of 10 element array, Mem[0],Mem[8],..Mem[72]
// for (X0 = 9; X0 > 0; X0--) {
// for (X1 = 0; X1 < X0; X1++) {
// if (A[X1+1] < A[X1]) SWAP(A[X1+1],A[X1]);
// }
// }
// Requires:
// ADDI, ADDS, B, B.LT] , CBZ, LDUR, STUR & SUBS instructions
// Expected results:
// X11 = 1
// X12 = 2
// X13 = 3
// X14 = 4
// X15 = 5
// X16 = 6
// X17 = 7
// X18 = 8
// X19 = 9
// X20 = 10
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//B.cond: CB-type, if (flags meet condition) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Cond
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//01010100 2's Comp Imm19 0..15
//CBZ: CB-type, if (R[Rt] == 0) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Rt
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//10110100 2's Comp Imm19 0..31
//LDUR: D-type, Reg[Rt] = Mem[Reg[Rn] + SignExtend(Imm9)]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000010 2's Comp 00 0..31 0..31
//STUR: D-type, Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rt]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000000 2's Comp 00 0..31 0..31
//SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11101011000 0..31 000000 0..31 0..31
// STORE_VALS:
1001000100_000000001010_11111_00000 // ADDI X0, X31, #10
11111000000_000000000_00_11111_00000 // STUR X0, [X31, #0] // Mem[0] = 10
1001000100_000000000111_11111_00000 // ADDI X0, X31, #7
11111000000_000001000_00_11111_00000 // STUR X0, [X31, #8] // Mem[8] = 7
1001000100_000000000100_11111_00000 // ADDI X0, X31, #4
11111000000_000010000_00_11111_00000 // STUR X0, [X31, #16] // Mem[16] = 4
1001000100_000000001001_11111_00000 // ADDI X0, X31, #9
11111000000_000011000_00_11111_00000 // STUR X0, [X31, #24] // Mem[24] = 9
1001000100_000000000011_11111_00000 // ADDI X0, X31, #3
11111000000_000100000_00_11111_00000 // STUR X0, [X31, #32] // Mem[32] = 3
1001000100_000000001000_11111_00000 // ADDI X0, X31, #8
11111000000_000101000_00_11111_00000 // STUR X0, [X31, #40] // Mem[40] = 8
1001000100_000000000010_11111_00000 // ADDI X0, X31, #2
11111000000_000110000_00_11111_00000 // STUR X0, [X31, #48] // Mem[48] = 2
1001000100_000000000101_11111_00000 // ADDI X0, X31, #5
11111000000_000111000_00_11111_00000 // STUR X0, [X31, #56] // Mem[56] = 5
1001000100_000000000110_11111_00000 // ADDI X0, X31, #6
11111000000_001000000_00_11111_00000 // STUR X0, [X31, #64] // Mem[64] = 6
1001000100_000000000001_11111_00000 // ADDI X0, X31, #1
11111000000_001001000_00_11111_00000 // STUR X0, [X31, #72] // Mem[72] = 1
// MAIN:
1001000100_000000000001_11111_00101 // ADDI X5, X31, #1 // Need a constant 1 for decr
1001000100_000000001001_11111_00000 // ADDI X0, X31, #9 // for (X0 = 9; X0 > 0; X0--) {
// OUTER_LOOP:
1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // for (X1 = 0; X1 < X0; X1++) {
10101011000_00001_000000_00001_00100 // ADDS X4, X1, X1 // For addressing, X4=8*X1
// INNER_LOOP:
11111000010_000001000_00_00100_00011 // LDUR X3, [X4, #8] // get A[X1+1]
11111000010_000000000_00_00100_00010 // LDUR X2, [X4, #0] // get A[X1]
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11101011000_00011_000000_00010_11111 // SUBS X31, X2, X3 // Test X2 vs. X3
01010100_0000000000000000100_01011 // B.LT NO_SWAP // Don't swap if X2 < X3
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000000_000001000_00_00100_00010 // STUR X2, [X4, #8] // swap A[X1]
11111000000_000000000_00_00100_00011 // STUR X3, [X4, #0] // swap A[X1-1]
// NO_SWAP:
1001000100_000000000001_00001_00001 // ADDI X1, X1, #1 // X1++
1001000100_000000001000_00100_00100 // ADDI X4, X4, #8 // Keep X4=8*X1
11101011000_00000_000000_00001_11111 // SUBS X31, X1, X0 // Is X1 < X0?
01010100_1111111111111110101_01011 // B.LT INNER_LOOP // If so, continue inner loop
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
// DONE_INNER:
11101011000_00101_000000_00000_00000 // SUBS X0, X0, X5 // X0--
10110100_0000000000000000100_00000 // CBZ X0, DONE_OUTER // End outer loop when done
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
000101_11111111111111111111101110 // B OUTER_LOOP // Continue outer loop
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
// DONE_OUTER:
11111000010_000000000_00_11111_01011 // LDUR X11, [X31, #0] // Read back values to regs X11-X20
11111000010_000001000_00_11111_01100 // LDUR X12, [X31, #8]
11111000010_000010000_00_11111_01101 // LDUR X13, [X31, #16]
11111000010_000011000_00_11111_01110 // LDUR X14, [X31, #24]
11111000010_000100000_00_11111_01111 // LDUR X15, [X31, #32]
11111000010_000101000_00_11111_10000 // LDUR X16, [X31, #40]
11111000010_000110000_00_11111_10001 // LDUR X17, [X31, #48]
11111000010_000111000_00_11111_10010 // LDUR X18, [X31, #56]
11111000010_001000000_00_11111_10011 // LDUR X19, [X31, #64]
11111000010_001001000_00_11111_10100 // LDUR X20, [X31, #72]
// HALT:
000101_00000000000000000000000000 // B HALT // HALT
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP

View File

@ -0,0 +1,127 @@
// Test of a recursive Fibonacci function. Test "returns" the Nth number in the Fibonacci sequence.
//
//
//int fibonacci(int N) {
// if (N < 3) return 1;
// else return (fibonacci(N - 1) + fibonacci(N - 2));
//
//}
// Requires:
// ADDI, B, B.LT, BL, BR, LDUR, STUR, & SUBS instructions
// Expected results:
// X0 = 6 (N)
// X1 = 8 (Result of Fibonacci function with N = 6)
// X28 = 8
// X30 = 196
//ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12}
//OP Imm12 Rn Rd
//3322222222 221111111111 00000 00000
//1098765432 109876543210 98765 43210
//1001000100 Unsigned 0..31 0..31
//B: B-type, PC = PC + SignExtend({Imm26, 2'b00})
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//000101 2's Comp Imm26
//B.cond: CB-type, if (flags meet condition) PC = PC + SignExtend({Imm19, 2'b00})
//OP Imm19 Cond
//33222222 2222111111111100000 00000
//10987654 3210987654321098765 43210
//01010100 2's Comp Imm19 0..15
// Cond Name Meaning after SUBS FlagTest
// 01011 LT Signed < N!=V
//BL: B-type, PC = PC + SignExtend({Imm26, 2'b00}), X30 = PC + 4
//OP Imm26
//332222 22222211111111110000000000
//109876 54321098765432109876543210
//100101 2's Comp Imm26
//BR: R-type, PC = Reg[Rd]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11010110000 0..31 000000 0..31 0..31
//LDUR: D-type, Reg[Rt] = Mem[Reg[Rn] + SignExtend(Imm9)]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000010 2's Comp 00 0..31 0..31
//STUR: D-type, Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rt]
//OP Imm9 00 Rn Rt
//33222222222 211111111 11 00000 00000
//10987654321 098765432 10 98765 43210
//11111000000 2's Comp 00 0..31 0..31
//SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm]
//OP Rm Shamt Rn Rd
//33222222222 21111 111111 00000 00000
//10987654321 09876 543210 98765 43210
//11101011000 0..31 000000 0..31 0..31
//MAIN:
1001000100_000000000110_11111_00000 // ADDI X0, X31, #6 // X0 = N = 6
1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // X1 = 0 // RETURN REG
1001000100_000000010000_11111_01010 // ADDI X10, X31, #16 // X10 = 16 // for stack pointer decrementing with no SUBI
1001000100_000000000011_11111_01011 // ADDI X11, X31, #3 // X11 = 3 // for B.LT (N < 3)
1001000100_000000000001_11111_01100 // ADDI X12, X31, #1 // X12 = 1
1001000100_000000000010_11111_01101 // ADDI X13, X31, #2 // X13 = 2
1001000100_000000000000_11111_11100 // ADDI X28, X31, #0 // X28 = 0 // STACK POINTER
1001000100_000011000100_11111_11110 // ADDI X30, X31, #196 // X30 = END = 49*4 // RETURN ADDRESS
1001000100_000000001000_11100_11100 // ADDI X28, X28, #8 // Increase stack pointer by 8.
11111000000_000000000_00_11100_11110 // STUR X30, [X28, #0] // Store current return address on stack.
11111000000_111111000_00_11100_00000 // STUR X0, [X28, #-8] // Store current N on stack.
100101_00000000000000000000001000 // BL to FIBONACCI (+8)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_111111000_00_11100_00000 // LDUR X0, [X28, #-8] // Retrieve N from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_000000000_00_11100_11110 // LDUR X30, [X28, #0] // Retrieve return address from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11010110000_00000_000000_00000_11110 // BR X30 (RETURN)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
//FIBONACCI(N):
11101011000_01011_000000_00000_11111 // SUBS X31, X0, X11 // X31 = X0 - X11
01010100_0000000000000011010_01011 // B.LT to RESULT (+26)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
//FIBONACCI(N-2):
1001000100_000000010000_11100_11100 // ADDI X28, X28, #16 // Increase stack pointer by 16.
11111000000_000000000_00_11100_11110 // STUR X30, [X28, #0] // Store current return address on stack.
11111000000_111111000_00_11100_00000 // STUR X0, [X28, #-8] // Store current N on stack.
11101011000_01101_000000_00000_00000 // SUBS X0, X0, X13 // X0 = X0 - X13
100101_11111111111111111111111001 // BL to FIBONACCI (-7)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_111111000_00_11100_00000 // LDUR X0, [X28, #-8] // Retrieve N from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_000000000_00_11100_11110 // LDUR X30, [X28, #0] // Retrieve return address from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11101011000_01010_000000_11100_11100 // SUBS X28, X28, X10 // Decrease stack pointer by 16.
//FIBONACCI(N-1):
1001000100_000000010000_11100_11100 // ADDI X28, X28, #16 // Increase stack pointer by 16.
11111000000_000000000_00_11100_11110 // STUR X30, [X28, #0] // Store current return address on stack.
11111000000_111111000_00_11100_00000 // STUR X0, [X28, #-8] // Store current N on stack.
11101011000_01100_000000_00000_00000 // SUBS X0, X0, X12 // X0 = X0 - X12
100101_11111111111111111111101110 // BL to FIBONACCI (-18)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_111111000_00_11100_00000 // LDUR X0, [X28, #-8] // Retrieve N from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11111000010_000000000_00_11100_11110 // LDUR X30, [X28, #0] // Retrieve return address from stack.
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
11101011000_01010_000000_11100_11100 // SUBS X28, X28, X10 // Decrease stack pointer by 16.
11010110000_00000_000000_00000_11110 // BR X30 (RETURN)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
//RESULT:
1001000100_000000000001_00001_00001 // ADDI X1, X1, #1 // X1 += 1
11010110000_00000_000000_00000_11110 // BR X30 (RETURN)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP
//END:
000101_00000000000000000000000000 // B END (+0)
1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // NOOP

1
tools/sim/README.md Normal file
View File

@ -0,0 +1 @@

15
tools/sim/runs/run1.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test1.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run10.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test10.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run11.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test11.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run12.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test12.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run2.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test2.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run3.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test3.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run4.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test4.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run5.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test5.do
view wave
view structure
view signals
run -all
# End

15
tools/sim/runs/run6.do Normal file
View File

@ -0,0 +1,15 @@
vlib work
vlog ../../../src/hdl/*.sv
vsim -voptargs="+acc" -t 1ns -lib work CPU_pipelined_testbench
do ../waves/test6.do
view wave
view structure
view signals
run -all
# End

View File

@ -0,0 +1,26 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /mux4_64_tb/dut/i0
add wave -noupdate /mux4_64_tb/dut/i1
add wave -noupdate /mux4_64_tb/dut/i2
add wave -noupdate /mux4_64_tb/dut/i3
add wave -noupdate /mux4_64_tb/dut/out
add wave -noupdate /mux4_64_tb/dut/sel
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ps} 0}
quietly wave cursor active 0
configure wave -namecolwidth 150
configure wave -valuecolwidth 100
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ps} {1 ns}

28
tools/sim/waves/test1.do Normal file
View File

@ -0,0 +1,28 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {49729 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}

41
tools/sim/waves/test10.do Normal file
View File

@ -0,0 +1,41 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[5]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[6]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[7]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[8]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[9]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[10]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[14]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[15]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[16]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[17]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[18]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/dmem/mem[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/dmem/mem[8]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {455410 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {443254 ns} {584566 ns}

33
tools/sim/waves/test11.do Normal file
View File

@ -0,0 +1,33 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[11]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[12]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[13]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[14]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[15]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[16]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[17]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[18]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[19]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[20]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}

27
tools/sim/waves/test12.do Normal file
View File

@ -0,0 +1,27 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[28]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[30]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}

35
tools/sim/waves/test2.do Normal file
View File

@ -0,0 +1,35 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[5]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[6]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[7]}
add wave -noupdate /CPU_pipelined_testbench/dut/nnegativeEX
add wave -noupdate /CPU_pipelined_testbench/dut/ncarry_outEX
add wave -noupdate /CPU_pipelined_testbench/dut/noverflowEX
add wave -noupdate /CPU_pipelined_testbench/dut/nzeroEX
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}

29
tools/sim/waves/test3.do Normal file
View File

@ -0,0 +1,29 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[5]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}

34
tools/sim/waves/test4.do Normal file
View File

@ -0,0 +1,34 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[5]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[6]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[7]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/dmem/mem[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/dmem/mem[8]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/dmem/mem[16]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {346996 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {218254 ns} {359566 ns}

25
tools/sim/waves/test5.do Normal file
View File

@ -0,0 +1,25 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {218254 ns} {359566 ns}

31
tools/sim/waves/test6.do Normal file
View File

@ -0,0 +1,31 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /CPU_pipelined_testbench/dut/clk
add wave -noupdate /CPU_pipelined_testbench/dut/reset
add wave -noupdate -radix unsigned /CPU_pipelined_testbench/dut/pcIF
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[0]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[1]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[2]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[3]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[4]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[5]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[29]}
add wave -noupdate -radix decimal {/CPU_pipelined_testbench/dut/u_regfile/dataBus[30]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {0 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 113
configure wave -valuecolwidth 67
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 50
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ns} {141312 ns}