// Test of CBZ and B instruction. // Requires: // ADDI, ADDS, SUB, CBZ, B.LT, B, LDUR, STUR // Expected results: // X0 = 0 // X1 = 8 // X2 = 4 (on pipelined CPU), or 0 (single-cycle CPU). // X3 = 5 // X4 = 7 // X5 = 2 // X6 = -2 // X7 = -2 // X8 = 0 // X9 = 1 // X10 = -4 // X14 = 5 // X15 = 8 // X16 = 9 // X17 = 1 // X18 = 99 // Mem[0] = 8 // Mem[8] = 5 //ADDI: I-type, Reg[Rd] = Reg[Rn] + {'0, Imm12} //OP Imm12 Rn Rd //3322222222 221111111111 00000 00000 //1098765432 109876543210 98765 43210 //1001000100 Unsigned 0..31 0..31 //B: B-type, PC = PC + SignExtend({Imm26, 2'b00}) //OP Imm26 //332222 22222211111111110000000000 //109876 54321098765432109876543210 //000101 2's Comp Imm26 //CBZ: CB-type, if (R[Rt] == 0) PC = PC + SignExtend({Imm19, 2'b00}) //OP Imm19 Rt //33222222 2222111111111100000 00000 //10987654 3210987654321098765 43210 //10110100 2's Comp Imm19 0..31 //SUBS: R-type, Reg[Rd] = Reg[Rn] - Reg[Rm] //OP Rm Shamt Rn Rd //33222222222 21111 111111 00000 00000 //10987654321 09876 543210 98765 43210 //11101011000 0..31 000000 0..31 0..31 //ADDS: R-type, Reg[Rd] = Reg[Rn] + Reg[Rm] //OP Rm Shamt Rn Rd //33222222222 21111 111111 00000 00000 //10987654321 09876 543210 98765 43210 //10101011000 0..31 000000 0..31 0..31 //B.LT: CB-type, if (flags meet condition) PC = PC + SignExtend({Imm19, 2'b00}) //OP Imm19 Cond //33222222 2222111111111100000 00000 //10987654 3210987654321098765 43210 //01010100 2's Comp Imm19 01011 //LDUR: D-type, Reg[Rt] = Mem[Reg[Rn] + SignExtend(Imm9)] //OP Imm9 00 Rn Rt //33222222222 211111111 11 00000 00000 //10987654321 098765432 10 98765 43210 //11111000010 2's Comp 00 0..31 0..31 //STUR: D-type, Mem[Reg[Rn] + SignExtend(Imm9)] = Reg[Rt] //OP Imm9 00 Rn Rt //33222222222 211111111 11 00000 00000 //10987654321 098765432 10 98765 43210 //11111000000 2's Comp 00 0..31 0..31 // MAIN: 1001000100_000000000000_11111_00000 // ADDI X0, X31, #0 // X0 = 0 1001000100_000000000000_11111_00001 // ADDI X1, X31, #0 // X1 = 0 1001000100_000000000000_11111_00010 // ADDI X2, X31, #0 // X2 = 0, counter of branch delay slots. // // Simple forwarding 1001000100_000000000101_11111_00011 // ADDI X3, X31, #5 // X3 = 5 1001000100_000000000010_00011_00100 // ADDI X4, X3, #2 // X4 = 7 11101011000_00011_000000_00100_00101 // SUBS X5, X4, X3 // X5 = 2 11101011000_00100_000000_00011_00110 // SUBS X6, X3, X4 // X6 = -2 11101011000_00100_000000_00011_00111 // SUBS X7, X3, X4 // X7 = -2 // // Forwarding and X31 1001000100_111111111111_11111_11111 // ADDI X31, X31, #-1 // Writing -1 to X31, but it should stay as 0 11101011000_11111_000000_00001_01000 // SUBS X8, X1, X31 // X8 = 0 11101011000_01000_000000_11111_01000 // SUBS X8, X31, X8 // X8 = 0 11101011000_11111_000000_01000_01000 // SUBS X8, X8, X31 // X8 = 0 11101011000_11111_000000_01000_01000 // SUBS X8, X8, X31 // X8 = 0 // // Forwarding in the face of multiple writes 1001000100_000000000010_11111_01001 // ADDI X9, X31, #2 // X9 = 2 1001000100_000000000001_11111_01001 // ADDI X9, X31, #1 // X9 = 1 11101011000_01001_000000_11111_01010 // SUBS X10, X31, X9 // X10 = -1 11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -2 11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -3 11101011000_01001_000000_01010_01010 // SUBS X10, X10, X9 // X10 = -4 // // Forwarding involving an instruction that doesn't write the register file 11111000000_000000001_00_00100_00011 // STUR X3, [X4, #1] // Mem[8] = 5 1001000100_000000000000_00011_01110 // ADDI X14, X3, #0 // X14 = 5 // // Forwarding and load/store instructions 1001000100_000000001000_11111_00001 // ADDI X1, X31, 8 // X1 = 8 11111000000_000000000_00_11111_00001 // STUR X1, [X31, #0] // Mem[0] = 8 11111000010_000000000_00_11111_01111 // LDUR X15, [X31, #0] // X15 = Mem[0] = 8 1001000100_000000000000_11111_11111 // ADDI X31, X31, 0 // Noop 1001000100_000000000001_01111_10000 // ADDI X16, X15, 1 // X16 = 9 // // Flags and the pipelined CPU (set flag and quickly or slowly branch). 10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0. 11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags 01010100_0000000000000000100_01011 // B.LT TAKEN1 // Take the branch (+4). pc=112 1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 1 (increment delay slot counter) // ERROR1: 000101_00000000000000000000000000 // B ERROR1 // Should never get here (0). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop // TAKEN1: 10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0. 11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop - same as above but 1 cycle later. 01010100_0000000000000000100_01011 // B.LT TAKEN2 // Take the branch (+4). 1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 2 (increment delay slot counter) // ERROR2: 000101_00000000000000000000000000 // B ERROR2 // Should never get here (0). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop // TAKEN2: 10101011000_11111_000000_11111_11111 // ADDS X31, X31, X31 // Noop that sets all flags to 0. pc = 156 11101011000_00011_000000_11111_11111 // SUBS X31, X31, X3 // Yes, 0 < 5. Set flags 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop - same as above but much longer. 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop 01010100_0000000000000000100_01011 // B.LT TAKEN3 // Take the branch (+4). pc = 180 1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 3 (increment delay slot counter) // ERROR3: 000101_00000000000000000000000000 // B ERROR3 // Should never get here (0). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop // TAKEN3: 1001000100_000000000001_11111_10001 // ADDI X17, X31, #1 // X17 = 1 pc = 196 // // Forwarding to conditional branch 1001000100_000000000010_11111_00000 // ADDI X0, X31, #2 // X0 = 2 10110100_0000000000000000101_00000 // CBZ X0, ERROR4 // Should not be taken (+5). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop. 1001000100_000000000000_11111_00000 // ADDI X0, X31, #0 // X0 = 0 10110100_0000000000000000100_00000 // CBZ X0, SUCCESS // Should be taken (+4). pc = 216 1001000100_000000000001_00010_00010 // ADDI X2, X2, #1 // X2 = 4 (increment delay slot counter) // ERROR4: 000101_00000000000000000000000000 // B ERROR4 // Loop forever (0). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop. // SUCCESS: 1001000100_000001100011_11111_10010 // ADDI X18, X31, #99 // Show that we did finish. // HALT: 000101_00000000000000000000000000 // B HALT // Done (0). 1001000100_000000000000_11111_11111 // ADDI X31, X31, #0 // Noop.