In this part, you will be implementing the rest of the SM213 ISA, consisting of instructions that control the flow of execution. You may use the version you implemented yourself for Assignment 2 as a starting point.
The six instructions you must implement are branch, branch if equal, branch if greater, jump, get PC, and indirect jump. Their format and semantics are listed on the slides, as well as in the table below. Note that the indirect-jump offset is unsigned and so for indirect jump, pp ranges from 0 to 255. On the other hand, the branch pc-relative value is signed and so for branches, pp ranges from -128 to 127.
// This code contains the solution to CPSC 213 Assignment 2.
// Do not distribute this file or any portion of it to anyone.
// Do not remove this comment.
package arch.sm213.machine.student;
import arch.sm213.machine.AbstractSM213CPU;
import machine.AbstractMainMemory;
import machine.RegisterSet;
import util.UnsignedByte;
/**
* The Simple Machine CPU.
*
* Simulate the execution of a single cycle of the Simple Machine SM213 CPU.
*/
public class CPU extends AbstractSM213CPU {
/**
* Create a new CPU.
*
* @param name fully-qualified name of CPU implementation.
* @param memory main memory used by CPU.
*/
public CPU (String name, AbstractMainMemory memory) {
super (name, memory);
}
/**
* Fetch Stage of CPU Cycle.
* Fetch instruction at address stored in "pc" register from memory into instruction register
* and set "pc" to point to the next instruction to execute.
*
* Input register: pc.
* Output registers: pc, instruction, insOpCode, insOp0, insOp1, insOp2, insOpImm, insOpExt
* @see AbstractSM213CPU for pc, instruction, insOpCode, insOp0, insOp1, insOp2, insOpImm, insOpExt
*
* @throws MainMemory.InvalidAddressException when program counter contains an invalid memory address
*/
@Override protected void fetch() throws MainMemory.InvalidAddressException {
int pcVal = pc.get();
UnsignedByte[] ins = mem.read (pcVal, 2);
byte opCode = (byte) (ins[0].value() >>> 4);
insOpCode.set (opCode);
insOp0.set (ins[0].value() & 0x0f);
insOp1.set (ins[1].value() >>> 4);
insOp2.set (ins[1].value() & 0x0f);
insOpImm.set (ins[1].value());
pcVal += 2;
switch (opCode) {
case 0x0:
case 0xb:
long opExt = mem.readIntegerUnaligned (pcVal) & 0xffffffffL;
pcVal += 4;
insOpExt.set (opExt);
instruction.set (ins[0].value() << 40 | ins[1].value() << 32 | opExt);
break;
default:
insOpExt.set (0);
instruction.set (ins[0].value() << 40 | ins[1].value() << 32);
}
pc.set (pcVal);
}
/**
* Execution Stage of CPU Cycle.
* Execute instruction that was fetched by Fetch stage.
*
* Input state: pc, instruction, insOpCode, insOp0, insOp1, insOp2, insOpImm, insOpExt, reg, mem
* Ouput state: pc, reg, mem
* @see AbstractSM213CPU for pc, instruction, insOpCode, insOp0, insOp1, insOp2, insOpImm, insOpExt
* @see MainMemory for mem
* @see machine.AbstractCPU for reg
*
* @throws InvalidInstructionException when instruction format is invalid.
* @throws MachineHaltException when instruction is the HALT instruction.
* @throws RegisterSet.InvalidRegisterNumberException when instruction references an invalid register (i.e, not 0-7).
* @throws MainMemory.InvalidAddressException when instruction references an invalid memory address.
*/
@Override protected void execute () throws InvalidInstructionException, MachineHaltException, RegisterSet.InvalidRegisterNumberException, MainMemory.InvalidAddressException
{
switch (insOpCode.get()) {
case 0x0: // ld $v, d .............. 0d-- vvvv vvvv
reg.set (insOp0.get(), insOpExt.get());
break;
case 0x1: // ld o(rs), rd .......... 1psd (p = o / 4)
reg.set (insOp2.get(), mem.readInteger ((insOp0.get() << 2) + reg.get (insOp1.get())));
break;
case 0x2: // ld (rs, ri, 2), rd .... 2sid
reg.set (insOp2.get(), mem.readInteger (reg.get (insOp0.get()) + (reg.get (insOp1.get())<<2)));
break;
case 0x3: // st rs, o(rd) .......... 3spd (p = o / 4)
mem.writeInteger ((insOp1.get() << 2) + reg.get (insOp2.get()), reg.get (insOp0.get()));
break;
case 0x4: // st rs, (rd, ri, 4) .... 4sdi
mem.writeInteger (reg.get (insOp1.get()) + (reg.get (insOp2.get())<<2), reg.get (insOp0.get()));
break;
case 0x6: // ALU ................... 6-sd
switch (insOp0.get()) {
case 0x0: // mov rs, rd ........ 60sd
reg.set (insOp2.get(), reg.get (insOp1.get()));
break;
case 0x1: // add rs, rd ........ 61sd
reg.set (insOp2.get(), reg.get (insOp1.get()) + reg.get (insOp2.get()));
break;
case 0x2: // and rs, rd ........ 62sd
reg.set (insOp2.get(), reg.get (insOp1.get()) & reg.get (insOp2.get()));
break;
case 0x3: // inc rr ............ 63-r
reg.set (insOp2.get(), reg.get (insOp2.get()) + 1);
break;
case 0x4: // inca rr ........... 64-r
reg.set (insOp2.get(), reg.get (insOp2.get()) + 4);
break;
case 0x5: // dec rr ............ 65-r
reg.set (insOp2.get(), reg.get (insOp2.get()) - 1);
break;
case 0x6: // deca rr ........... 66-r
reg.set (insOp2.get(), reg.get (insOp2.get()) - 4);
break;
case 0x7: // not ............... 67-r
reg.set (insOp2.get(), ~reg.get (insOp2.get()));
break;
case 0xf: // gpc ............... 6fpr
reg.set(insOp2.get(), pc.get() + (insOp1.get() << 1));
break;
default:
throw new InvalidInstructionException();
}
break;
case 0x7: // sh? $i,rd ............. 7dii
if (insOpImm.get() > 0)
reg.set (insOp0.get(), reg.get (insOp0.get()) << insOpImm.get());
else
reg.set (insOp0.get(), reg.get (insOp0.get()) >> -insOpImm.get());
break;
case 0x8: // br a .................. 8-pp (a = pc + pp * 2)
pc.set(pc.get() + ((byte) insOpImm.get()) * 2);
break;
case 0x9: // beq rs, a ............. 9rpp (a = pc + pp * 2)
if (reg.get(insOp0.get()) != 0) break;
pc.set(pc.get() + ((byte) insOpImm.get()) * 2);
break;
case 0xa: // bg rs, a .............. arpp (a = pc + pp * 2)
if (reg.get(insOp0.get()) <= 0) break;
pc.set(pc.get() + ((byte) insOpImm.get()) * 2);
break;
case 0xb: // j i ................... b--- iiii iiii
pc.set((int) insOpExt.get());
break;
case 0xc: // j o(rr) ............... crpp (pp = o / 2)
pc.set(reg.get(insOp0.get()) + (insOpImm.getUnsigned() * 2));
break;
case 0xf: // halt or nop ............. f?--
if (insOp0.get() == 0)
// halt .......................... f0--
throw new MachineHaltException();
else if (insOp0.getUnsigned() == 0xf)
// nop ........................... ff--
break;
break;
default:
throw new InvalidInstructionException();
}
}
}
Run each of these snippets through the simulator. Carefully observe what happens as they execute so that you could explain this code to someone if they asked. These files are for your reference only.
Now use the files above as a reference to translate q2.c into commented assembly code, placing your code in a file named q2.s. Labels for global variables should be the name of the variable. Run your code in the simulator and examine every variable to be sure they have the correct value when execution halts.
.pos 0x1000
# === Loop Initialization ===
ld $0, r0 # r0 = 0
ld $i, r1 # r1 = &i
st r0, 0(r1) # i = 0
ld $n, r2 # r2 = &n
ld (r2), r2 # r2 = n
not r2 # r2 = ~n
inc r2 # r2 = -n
loop:
# === Loop Conditional Check ===
ld (r1), r3 # r3 = i
add r2, r3 # r3 = i - n
# i >= 0; goto end_loop
# i - n > 0
# i > n
bgt r3, end_loop
# i - n == 0
# i == n
beq r3, end_loop
# === Loop Body ===
ld (r1), r3 # r3 = i
ld $a, r4 # r4 = &a
ld (r4, r3, 4), r4 # r4 = a[i]
ld $b, r5 # r5 = &b
ld (r5, r3, 4), r5 # r5 = b[i]
# === Conditional Check ===
not r5 # r5 = ~b[i]
inc r5 # r5 = -b[i]
add r5, r4 # r4 = a[i] - b[i]
# a[i] - b[i] > 0
# a[i] > b[i]
bgt r4, if
br end_if
if:
# === Conditional Body ===
ld $c, r4 # r4 = &c
ld (r4), r5 # r5 = c
inc r5 # r5 = c + 1
st r5, (r4) # c = c+1
end_if:
# === Index Variable Incrementation ===
ld (r1), r3 # r3 = i
inc r3 # r3 = i + 1
st r3, (r1) # i = i + 1
br loop
end_loop:
halt
.pos 0x2000
i: .long -1
n: .long 5
a: .long 10
.long 20
.long 30
.long 40
.long 50
b: .long 11
.long 20
.long 28
.long 44
.long 48
c: .long 0
Implement an assembly-language program that examines a list of student grades and finds the student with the median average grade; this student’s id should be placed in the variable m.
Your program must correspond to a C program where the input list of students, and the output student id are given as follows.
struct Student {
int sid;
int grade[4];
int average; // this is computed by your program
};
int n; // number of students
int m; // you store the median student's id here
struct Student* s; // a dynamic array of n students
Your assembly file must format student records exactly as C would; i.e., a struct with 6 integers. You must use the following assembly declarations for this input and output data.
Note: this example is for an array with one student. You should obviously test your code on larger arrays. The locations for each variable are also not predetermined. Your code should work no matter where in memory each of the variables are placed.
n: .long 1 # just one student
m: .long 0 # put the answer here
s: .long base # address of the array
base: .long 1234 # student ID
.long 80 # grade 0
.long 60 # grade 1
.long 78 # grade 2
.long 90 # grade 3
.long 0 # computed average
Note that the code above uses the label base as the base of the array to simplify the assignment of values to s; you may assign this label to s, but your code must not use this label in its implementation. You must use s as the pointer variable that contains the base address of the array. The autograder assigns ("allocates") a different base address to s, so your code must be ready to handle that.
Put your solution in a file named q3.s.
This is a very challenging programming problem. It will stretch (and strengthen) your ability to manage a large number of programming details, as is required when writing assembly. To be successful with this you must be very disciplined to program and test incrementally.
Observe that the program breaks down into several subproblems. You should tackle them one at a time and thoroughly test each step before moving to the next.
Here’s a list of the key subproblems. You might want to further sub-divide things, but do not try to combine steps.
void sort (int* a, int n) {
for (int i=n-1; i>0; i--)
for (int j=1; j<=i; j++)
if (a[j-1] > a[j]) {
int t = a[j];
a[j] = a[j-1];
a[j-1] = t;
}
}
Take this step by step and incorporate your work from Step 3-5 above. For Bubble Sort, here are two subproblems.
Notice that it is not necessary to do these steps in the order listed above. Steps 1 and 2, for example, are completely independent from Steps 3-7. Step 7 is independent from the other steps. You could do Step 6 before Steps 3-5 and then incorporate the conditional swap once the other parts of Step 6 are working. Similarly, you can do Step 5 before Steps 3 or 4 and incorporate them into the procedure later. The key thing here is to work on each piece independently and then carefully combine steps to eventually produce the program. The program itself will be on the order of 70 to 100 assembly-language instructions. That sounds like a lot ... and it is. But if you think of this as seven steps each with 15 or so instructions (some will have less), it's not so bad. Or maybe it will be bad, but at least not impossible.
Be sure to comment every line of your code and to separate subproblems with comments and labels so that it is easy for you to see what each part of the code does without having to read the code.
You will have noticed that the variable s is a dynamic array of type struct Student and that sizeof(struct Student) is 24. And so you might find that you’d like to multiply or divide by 24 (i.e., to convert between an array index and a byte-offset), but you’ll recall that you can’t do this with a single instruction in our ISA. Then you’ll notice that x*24 = x*16 + x*8 and you’ll see that you can multiply by 24 (or divide) using 3 instructions (or 4 depending on how you count).
Keeping track of registers is going to be a challenge. Note that this program is too big for you to use a distinct register for every value in the program. You are going to have to re-use registers to store different things in different parts of the program.
This will add complexity, for example, when combining two steps or when adding a step to the rest of the program. For example, if your code for Step 4 uses register r0 and one of the loops you write in Step 6 also uses r0, you’re going to have to change one of them to use a different register (or use a procedure as described in the next section). So, for parts of the code that connect like this, some pre-planning will help.
Another useful strategy is to group sections of code (one or maybe a few steps) and treat them as independent stages with respect to registers. At the beginning of a stage you assume nothing about the current value of registers and you are free to use any registers you like within that stage. At the end of the stage, any register values that are needed by subsequent stages should be written to memory. You may want to create some additional variables to store these temporary values.
One way to divide code into stages is to use procedures. Step 5 suggests that you do this to encapsulate the code that conditionally swaps two adjacent elements of the list (i.e., Steps 3 and 4). You can use this approach to simplify register management by having the procedure save registers to memory before it starts and restore them from memory before it returns (e.g., using temporary variables). Any register the procedure saves in this way is a register it is free to use without interfering with the registers used in the two loops in Step 6 that call it.
This swap procedure needs one argument / parameter: i.e., the index of one of the array elements to swap. The index of the other element is this value plus or minus one, depending on how you do it. The caller should pass this value in a register; for example if the loop has the index in r4 then the procedure should just use r4 to get this value.
Use gpc and j to call the procedure and use the indirect jump to return. Do not worry about creating a stack.
All of this is optional, and you can do this for other parts of your program as well.
Finally, partial marks (if you need them ... probably not!) will be awarded based on the number of the steps listed above that you’ve written correctly. Submitting a big blob of assembly that doesn’t do any of the steps right won’t be worth anything. So, work incrementally. Test each step separately. Once you have a step working, save it in an auxiliary file just in case.
.pos 0x1000
# Jump to main program
br main
main:
# for (i = n; i > 0; i--)
# sum = s[i-1].grade[0] + ... + s[i-1].grade[3]
# s[i-1].average = sum / 4
ld $n, r0 # r0 = &n
ld (r0), r0 # r0 = n (number of students)
ld $i, r7 # r7 = &i
st r0, (r7) # i = n
avg_loop:
ld $i, r7 # r7 = &i
ld (r7), r7 # r7 = i
beq r7, avg_done # if i == 0, all averages computed
dec r7 # r7 = i - 1 (0-based student index)
# Compute byte offset = (i-1)*24 = (i-1)*16 + (i-1)*8
mov r7, r0 # r0 = i - 1
shl $4, r7 # r7 = (i-1) * 16
shl $3, r0 # r0 = (i-1) * 8
add r0, r7 # r7 = (i-1) * 24
# Get address of student s[i-1]
ld $s, r1 # r1 = &s
ld (r1), r1 # r1 = s (pointer to student array)
add r7, r1 # r1 = &s[i-1]
# Sum the 4 grades at offsets 4, 8, 12, 16
ld 4(r1), r2 # r2 = s[i-1].grade[0]
ld 8(r1), r3 # r3 = s[i-1].grade[1]
add r3, r2 # r2 = grade[0] + grade[1]
ld 12(r1), r3 # r3 = s[i-1].grade[2]
add r3, r2 # r2 += grade[2]
ld 16(r1), r3 # r3 = s[i-1].grade[3]
add r3, r2 # r2 = sum of all 4 grades
# Divide sum by 4 (truncating integer division)
shr $2, r2 # r2 = sum / 4
# Store computed average into the student struct
st r2, 20(r1) # s[i-1].average = r2
# Decrement i and repeat
ld $i, r7 # r7 = &i
ld (r7), r0 # r0 = i
dec r0 # r0 = i - 1
st r0, (r7) # i = i - 1
br avg_loop # continue loop
avg_done:
# for (i = n-1; i > 0; i--)
# for (j = 0; j < i; j++)
# if (s[j].average > s[j+1].average)
# swap(s[j], s[j+1])
ld $n, r0 # r0 = &n
ld (r0), r0 # r0 = n
dec r0 # r0 = n - 1
ld $i, r7 # r7 = &i
st r0, (r7) # i = n - 1
sort_outer:
ld $i, r7 # r7 = &i
ld (r7), r7 # r7 = i
bgt r7, sort_inner_init # if i > 0, proceed to inner loop
br sort_done # else sorting is complete
sort_inner_init:
ld $0, r0 # r0 = 0
ld $j, r7 # r7 = &j
st r0, (r7) # j = 0
sort_inner:
# Load j and i for loop bound check
ld $j, r7 # r7 = &j
ld (r7), r3 # r3 = j
ld $i, r7 # r7 = &i
ld (r7), r4 # r4 = i
# Check j < i by computing j - i
mov r3, r7 # r7 = j
not r4 # r4 = ~i
inc r4 # r4 = -i
add r4, r7 # r7 = j - i
bgt r7, sort_inner_end # if j > i, exit inner loop
beq r7, sort_inner_end # if j == i, exit inner loop
# Compute &s[j] = s + j * 24
ld $s, r1 # r1 = &s
ld (r1), r1 # r1 = s (base address)
mov r3, r5 # r5 = j
mov r3, r6 # r6 = j
shl $4, r5 # r5 = j * 16
shl $3, r6 # r6 = j * 8
add r6, r5 # r5 = j * 24
add r5, r1 # r1 = &s[j]
mov r1, r5 # r5 = &s[j] (save for swap)
# Compare s[j].average with s[j+1].average
ld 20(r5), r1 # r1 = s[j].average
ld 44(r5), r2 # r2 = s[j+1].average (offset 20+24=44)
# Compute difference: s[j].avg - s[j+1].avg
not r2 # r2 = ~s[j+1].average
inc r2 # r2 = -s[j+1].average
add r2, r1 # r1 = s[j].avg - s[j+1].avg
bgt r1, do_swap # if positive, s[j] has higher avg -> swap
br no_swap # otherwise skip swap
# r5 = &s[j]; s[j+1] starts at r5 + 24
do_swap:
# Swap field 0: sid (offsets 0 and 24)
ld 0(r5), r1 # r1 = s[j].sid
ld 24(r5), r2 # r2 = s[j+1].sid
st r2, 0(r5) # s[j].sid = s[j+1].sid
st r1, 24(r5) # s[j+1].sid = old s[j].sid
# Swap field 1: grade[0] (offsets 4 and 28)
ld 4(r5), r1 # r1 = s[j].grade[0]
ld 28(r5), r2 # r2 = s[j+1].grade[0]
st r2, 4(r5) # s[j].grade[0] = s[j+1].grade[0]
st r1, 28(r5) # s[j+1].grade[0] = old s[j].grade[0]
# Swap field 2: grade[1] (offsets 8 and 32)
ld 8(r5), r1 # r1 = s[j].grade[1]
ld 32(r5), r2 # r2 = s[j+1].grade[1]
st r2, 8(r5) # s[j].grade[1] = s[j+1].grade[1]
st r1, 32(r5) # s[j+1].grade[1] = old s[j].grade[1]
# Swap field 3: grade[2] (offsets 12 and 36)
ld 12(r5), r1 # r1 = s[j].grade[2]
ld 36(r5), r2 # r2 = s[j+1].grade[2]
st r2, 12(r5) # s[j].grade[2] = s[j+1].grade[2]
st r1, 36(r5) # s[j+1].grade[2] = old s[j].grade[2]
# Swap field 4: grade[3] (offsets 16 and 40)
ld 16(r5), r1 # r1 = s[j].grade[3]
ld 40(r5), r2 # r2 = s[j+1].grade[3]
st r2, 16(r5) # s[j].grade[3] = s[j+1].grade[3]
st r1, 40(r5) # s[j+1].grade[3] = old s[j].grade[3]
# Swap field 5: average (offsets 20 and 44)
ld 20(r5), r1 # r1 = s[j].average
ld 44(r5), r2 # r2 = s[j+1].average
st r2, 20(r5) # s[j].average = s[j+1].average
st r1, 44(r5) # s[j+1].average = old s[j].average
no_swap:
# Increment j and continue inner loop
ld $j, r7 # r7 = &j
ld (r7), r0 # r0 = j
inc r0 # r0 = j + 1
st r0, (r7) # j = j + 1
br sort_inner # repeat inner loop
sort_inner_end:
# Decrement i and continue outer loop
ld $i, r7 # r7 = &i
ld (r7), r0 # r0 = i
dec r0 # r0 = i - 1
st r0, (r7) # i = i - 1
br sort_outer # repeat outer loop
sort_done:
# For odd n, median is at index n/2 (0-based) in sorted array
ld $n, r0 # r0 = &n
ld (r0), r0 # r0 = n
shr $1, r0 # r0 = n / 2 (median index, 0-based)
# Compute &s[median] = s + median_index * 24
ld $s, r1 # r1 = &s
ld (r1), r1 # r1 = s (base address of array)
mov r0, r2 # r2 = median_index
shl $4, r0 # r0 = median_index * 16
shl $3, r2 # r2 = median_index * 8
add r2, r0 # r0 = median_index * 24
add r0, r1 # r1 = &s[median_index]
# Store median student's sid in m
ld (r1), r0 # r0 = s[median_index].sid
ld $m, r1 # r1 = &m
st r0, (r1) # m = median student's sid
halt # done
.pos 0x2000
n: .long 3 # number of students
m: .long 0 # output: median student's sid goes here
s: .long base # pointer to student array
i: .long 0 # loop variable i
j: .long 0 # loop variable j
base: .long 1 # student 0: sid = 1
.long 80 # grade[0]
.long 60 # grade[1]
.long 78 # grade[2]
.long 90 # grade[3]
.long 0 # average (computed by program)
.long 2 # student 1: sid = 2
.long 90 # grade[0]
.long 95 # grade[1]
.long 88 # grade[2]
.long 87 # grade[3]
.long 0 # average (computed by program)
.long 3 # student 2: sid = 3
.long 50 # grade[0]
.long 60 # grade[1]
.long 70 # grade[2]
.long 40 # grade[3]
.long 0 # average (computed by program)