Structure of the assembly code
Assembly code for our processor is composed of .code and .data sections. .code section will contain the variable declarations. These variables will be the operands for LDI instructions. A general declaration for a variable is:
count: 60
which means that you have a variable named count with initial value 60. The variables (except array variables) always occupy 16-bit space. All variables must start with a letter.
There are also array declarations:
my_array: .space 10
arrays are declared with the keyword .space. In the above example, we must reserve 10 consecutive 16-bit locations for the variable my_array.
.code section contains the instructions. The first instruction must always be mapped to the address 0x000 in memory. All labels of jump and jz instructions must be strings.
An example program in assembly code and its machine code translation
Assembly for
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
.data count: 60 array: .space 10 char: 0xfe .code ldi 0 count ld 0 0 ldi 1 array ldi 2 char ld 2 2 lpp st 1 2 inc 1 dec 0 jz loop jmp lpp loop sub 1 2 3 lp1 jmp lp1 |
The machine code of the above assembly program is listed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
000 1000 ldi 0 count 001 000f 002 2000 ld 0 0 003 1001 ldi 1 array 004 0010 005 1002 ldi 2 char 006 001b 007 2012 ld 2 2 008 3088 st 1 2 009 7609 inc 1 00a 7700 dec 0 00b 4001 jz loop 00c 5ffb jmp lpp 00d 71d1 sub 1 2 3 00e 5fff jmp lp1 00f 003c Variable count, with initial value 60, or 0x3c 010 0000 Ten empty locations of the variable array 011 0000 012 0000 013 0000 014 0000 015 0000 016 0000 017 0000 018 0000 019 0000 01a 0000 01b 00fe The variable char |
Source code for a simple assembler
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
// to compile, gcc assembler.c -o assembler // No error check is provided. // Variable names cannot start with numeric characters, ie, with 0-9. // hexadecimals are twos complement. // first address of the code section is zero, and the data section follows the code section in memory. // four tables are formed: jump table, ldi table, label table and variable table. #include <stdio.h> #include <stdlib.h> #include <string.h> //Converts a hexadecimal string to integer. int hex2int( char* hex) { int result=0; while ((*hex)!='\0') { if (('0'<=(*hex))&&((*hex)<='9')) result = result*16 + (*hex) -'0'; else if (('a'<=(*hex))&&((*hex)<='f')) result = result*16 + (*hex) -'a'+10; else if (('A'<=(*hex))&&((*hex)<='F')) result = result*16 + (*hex) -'A'+10; hex++; } return(result); } main() { FILE *fp; char line[100]; char *token = NULL; char *op1, *op2, *op3, *label; char ch; int chch; int program[1000]; int counter=0; //holds the address of the machine code instruction struct label_or_variable { int location; char *name; }; // A label is a symbol which mark a location within the code section. In the example // program above, the strings "lpp", "loop" and "lp1" are labels. // In reptile, labels are used by jump, jz and ldi instructions. struct label_or_variable labeltable[50]; //there can be 50 labels at most in our programs int nooflabels = 0; //number of labels encountered during assembly. // A variable is a symbol which mark a location within the data section. In the example // program above, the strings "", "" and "" are variables. // In reptile, variables are used by ldi instructions. struct label_or_variable variabletable[50]; // The list of variables in .data section and their locations. int noofvariables = 0; //number of jumps encountered during assembly. // Jump instructions cannot be assembled readily because we may not know the value of // the label when we encountered a jump instruction. This happens if the label used by // that jump instruction appear below that jump instruction. This is the situation // with the label "loop" in the example program above. Hence, the location of jump // instructions must be stored. struct label_or_variable jumptable[100]; //There can be at most 100 jumps int noofjumps=0; //number of jump instructions encountered during assembly. //Variables and labels are used by ldi instructions. //The memory for the variables are traditionally allocated at the end of the code section. //Hence their addresses are not known when we assemble a ldi instruction. Also, the value of //a label may not be known when we encounter a ldi instruction which uses that label. //Hence, the location of the ldi instructions must be kept, and these instructions must be //modified when we discover the address of the label or variable that it uses. struct label_or_variable lditable[100]; int noofldis=0; fp = fopen("name_of_program","r"); if (fp != NULL) { while(fgets(line,sizeof line,fp)!= NULL) //skip till .code section { token=strtok(line,"\n\t\r "); if (strcmp(token,".code")==0 ) break; } while(fgets(line,sizeof line,fp)!= NULL) { token=strtok(line,"\n\t\r "); //get the instruction mnemonic or label //======================================== FIRST PASS ====================================================== while (token) { if (strcmp(token,"ldi")==0) //---------------LDI INSTRUCTION-------------------- { op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ldi, which is the register that ldi loads op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ldi, which is the data that is to be loaded program[counter]=0x1000+hex2int(op1); //generate the first 16-bit of the ldi instruction counter++; //move to the second 16-bit of the ldi instruction if ((op2[0]=='0')&&(op2[1]=='x')) //if the 2nd operand is twos complement hexadecimal program[counter]=hex2int(op2+2)&0xffff; //convert it to integer and form the second 16-bit else if (( (op2[0])=='-') || ((op2[0]>='0')&&(op2[0]<='9'))) //if the 2nd operand is decimal program[counter]=atoi(op2)&0xffff; //convert it to integer and form the second 16-bit else //if the second operand is not decimal or hexadecimal, it is a laber or a variable. { //in this case, the 2nd 16-bits of the ldi instruction cannot be generated. lditable[noofldis].location = counter; //record the location of this 2nd 16-bit op1=(char*)malloc(sizeof(op2)); //and the name of the label/variable that it must contain strcpy(op1,op2); //in the lditable array. lditable[noofldis].name = op1; noofldis++; } counter++; //skip to the next memory location } else if (strcmp(token,"ld")==0) //------------LD INSTRUCTION--------------------- { op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ld, which is the destination register op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ld, which is the source register ch = (op1[0]-48)| ((op2[0]-48) << 3); //form bits 11-0 of machine code. 48 is ASCII value of '0' program[counter]=0x2000+((ch)&0x00ff); //form the instruction and write it to memory counter++; //skip to the next empty location in memory } else if (strcmp(token,"st")==0) //-------------ST INSTRUCTION-------------------- { //to be added } else if (strcmp(token,"jz")==0) //------------- CONDITIONAL JUMP ------------------ { //to be added } else if (strcmp(token,"jmp")==0) //-------------- JUMP ----------------------------- { op1 = strtok(NULL,"\n\t\r "); //read the label string jumptable[noofjumps].location = counter; //write the jz instruction's location into the jumptable op2=(char*)malloc(sizeof(op1)); //allocate space for the label strcpy(op2,op1); //copy the label into the allocated space jumptable[noofjumps].name=op2; //point to the label from the jumptable noofjumps++; //skip to the next empty location in jumptable program[counter]=0x5000; //write the incomplete instruction (just opcode) to memory counter++; //skip to the next empty location in memory. } else if (strcmp(token,"add")==0) //----------------- ADD ------------------------------- { op1 = strtok(NULL,"\n\t\r "); op2 = strtok(NULL,"\n\t\r "); op3 = strtok(NULL,"\n\t\r "); chch = (op1[0]-48)| ((op2[0]-48)<<3)|((op3[0]-48)<<6); program[counter]=0x7000+((chch)&0x00ff); counter++; } else if (strcmp(token,"sub")==0) { //to be added } else if (strcmp(token,"and")==0) { //to be added } else if (strcmp(token,"or")==0) { //to be added } else if (strcmp(token,"xor")==0) { //to be added } else if (strcmp(token,"not")==0) { op1 = strtok(NULL,"\n\t\r "); op2 = strtok(NULL,"\n\t\r "); ch = (op1[0]-48)| ((op2[0]-48)<<3); program[counter]=0x7500+((ch)&0x00ff); counter++; } else if (strcmp(token,"mov")==0) { //to be added } else if (strcmp(token,"inc")==0) { op1 = strtok(NULL,"\n\t\r "); ch = (op1[0]-48)| ((op1[0]-48)<<3); program[counter]=0x7700+((ch)&0x00ff); counter++; } else if (strcmp(token,"dec")==0) { //to be added } else //------WHAT IS ENCOUNTERED IS NOT AN INSTRUCTION BUT A LABEL. UPDATE THE LABEL TABLE-------- { labeltable[nooflabels].location = counter; //read the label and update labeltable. op1=(char*)malloc(sizeof(token)); strcpy(op1,token); labeltable[nooflabels].name=op1; nooflabels++; } token = strtok(NULL,",\n\t\r "); // if what is read before is an instruction, this will be NULL //if what is read before is an label, this will be an opcode. } } //================================= SECOND PASS ============================== //supply the address fields of the jump and jz instructions by matching jumptable and labeltable int i,j; for (i=0; i<noofjumps;i++) //for all jump/jz instructions encountered { j=0; while ((j<nooflabels)&&( strcmp(jumptable[i].name , labeltable[j].name ) != 0 )) //if the label for this jump/jz does not match with the j++; // jth label in the labeltable, check the next label.. program[jumptable[i].location] +=(labeltable[j].location-jumptable[i].location-1)&0x0fff; //copy the jump address into memory. } //search for the start of the .data segment rewind(fp); while(fgets(line,sizeof line,fp)!= NULL) //skip till .data, if no .data, also ok. { token=strtok(line,"\n\t\r "); if (strcmp(token,".data")==0 ) break; } // process the .data segment and generate the variabletable[] array. int dataarea=0; while(fgets(line,sizeof line,fp)!= NULL) { token=strtok(line,"\n\t\r "); if (strcmp(token,".code")==0 ) //go till the .code segment break; else if (token[strlen(token)-1]==':') { token[strlen(token)-1]='\0'; //will not cause memory leak, as we do not do malloc variabletable[noofvariables].location=counter+dataarea; op1=(char*)malloc(sizeof(token)); strcpy(op1,token); variabletable[noofvariables].name=op1; token = strtok(NULL,",\n\t\r "); if (token==NULL) program[counter+dataarea]=0; else if (strcmp(token, ".space")==0) { token=strtok(NULL,"\n\t\r "); dataarea+=atoi(token); } else if((token[0]=='0')&&(token[1]=='x')) program[counter+dataarea]=hex2int(token+2)&0xffff; else if (( (token[0])=='-') || ('0'<=(token[0])&&(token[0]<='9')) ) program[counter+dataarea]=atoi(token)&0xffff; noofvariables++; dataarea++; } } // supply the address fields for the ldi instructions from the variable table for( i=0; i<noofldis;i++) { j=0; while ((j<noofvariables)&&( strcmp( lditable[i].name , variabletable[j].name)!=0 )) j++; if (j<noofvariables) program[lditable[i].location] = variabletable[j].location; } // supply the address fields for the ldi instructions from the label table for( i=0; i<noofldis;i++) { j=0; while ((j<nooflabels)&&( strcmp( lditable[i].name , labeltable[j].name)!=0 )) j++; if (j<nooflabels){ program[lditable[i].location] = (labeltable[j].location)&0x0fff; printf("%d %d %d\n", i, j, (labeltable[j].location)); } } //display the resulting tables printf("LABEL TABLE\n"); for (i=0;i<nooflabels;i++) printf("%d %s\n", labeltable[i].location, labeltable[i].name); printf("\n"); printf("JUMP TABLE\n"); for (i=0;i<noofjumps;i++) printf("%d %s\n", jumptable[i].location, jumptable[i].name); printf("\n"); printf("VARIABLE TABLE\n"); for (i=0;i<noofvariables;i++) printf("%d %s\n", variabletable[i].location, variabletable[i].name); printf("\n"); printf("LDI INSTRUCTIONS\n"); for (i=0;i<noofldis;i++) printf("%d %s\n", lditable[i].location, lditable[i].name); printf("\n"); fclose(fp); fp = fopen("RAM","w"); fprintf(fp,"v2.0 raw\n"); for (i=0;i<counter+dataarea;i++) fprintf(fp,"%04x\n",program[i]); } } |