Structure of the assembly code
Assembly code for our processor is composed of .code and .data sections. .code section will contain the variable declarations. These variables will be the operands for LDI instructions. A general declaration for a variable is:
count: 60
which means that you have a variable named count with initial value 60. The variables (except array variables) always occupy 16-bit space. All variables must start with a letter.
There are also array declarations:
my_array: .space 10
arrays are declared with the keyword .space. In the above example, we must reserve 10 consecutive 16-bit locations for the variable my_array.
.code section contains the instructions. The first instruction must always be mapped to the address 0x000 in memory. All labels of jump and jz instructions must be strings.
An example program in assembly code and its machine code translation
Assembly for
.data count: 60 array: .space 10 char: 0xfe .code ldi 0 count ld 0 0 ldi 1 array ldi 2 char ld 2 2 lpp st 1 2 inc 1 dec 0 jz loop jmp lpp loop sub 1 2 3 lp1 jmp lp1
The machine code of the above assembly program is listed below.
000 1000 ldi 0 count 001 000f 002 2000 ld 0 0 003 1001 ldi 1 array 004 0010 005 1002 ldi 2 char 006 001b 007 2012 ld 2 2 008 3088 st 1 2 009 7609 inc 1 00a 7700 dec 0 00b 4001 jz loop 00c 5ffb jmp lpp 00d 71d1 sub 1 2 3 00e 5fff jmp lp1 00f 003c Variable count, with initial value 60, or 0x3c 010 0000 Ten empty locations of the variable array 011 0000 012 0000 013 0000 014 0000 015 0000 016 0000 017 0000 018 0000 019 0000 01a 0000 01b 00fe The variable char
Source code for a simple assembler
// to compile, gcc assembler.c -o assembler // No error check is provided. // Variable names cannot start with numeric characters, ie, with 0-9. // hexadecimals are twos complement. // first address of the code section is zero, and the data section follows the code section in memory. // four tables are formed: jump table, ldi table, label table and variable table. #include <stdio.h> #include <stdlib.h> #include <string.h> //Converts a hexadecimal string to integer. int hex2int( char* hex) { int result=0; while ((*hex)!='\0') { if (('0'<=(*hex))&&((*hex)<='9')) result = result*16 + (*hex) -'0'; else if (('a'<=(*hex))&&((*hex)<='f')) result = result*16 + (*hex) -'a'+10; else if (('A'<=(*hex))&&((*hex)<='F')) result = result*16 + (*hex) -'A'+10; hex++; } return(result); } main() { FILE *fp; char line[100]; char *token = NULL; char *op1, *op2, *op3, *label; char ch; int chch; int program[1000]; int counter=0; //holds the address of the machine code instruction struct label_or_variable { int location; char *name; }; // A label is a symbol which mark a location within the code section. In the example // program above, the strings "lpp", "loop" and "lp1" are labels. // In reptile, labels are used by jump, jz and ldi instructions. struct label_or_variable labeltable[50]; //there can be 50 labels at most in our programs int nooflabels = 0; //number of labels encountered during assembly. // A variable is a symbol which mark a location within the data section. In the example // program above, the strings "", "" and "" are variables. // In reptile, variables are used by ldi instructions. struct label_or_variable variabletable[50]; // The list of variables in .data section and their locations. int noofvariables = 0; //number of jumps encountered during assembly. // Jump instructions cannot be assembled readily because we may not know the value of // the label when we encountered a jump instruction. This happens if the label used by // that jump instruction appear below that jump instruction. This is the situation // with the label "loop" in the example program above. Hence, the location of jump // instructions must be stored. struct label_or_variable jumptable[100]; //There can be at most 100 jumps int noofjumps=0; //number of jump instructions encountered during assembly. //Variables and labels are used by ldi instructions. //The memory for the variables are traditionally allocated at the end of the code section. //Hence their addresses are not known when we assemble a ldi instruction. Also, the value of //a label may not be known when we encounter a ldi instruction which uses that label. //Hence, the location of the ldi instructions must be kept, and these instructions must be //modified when we discover the address of the label or variable that it uses. struct label_or_variable lditable[100]; int noofldis=0; fp = fopen("name_of_program","r"); if (fp != NULL) { while(fgets(line,sizeof line,fp)!= NULL) //skip till .code section { token=strtok(line,"\n\t\r "); if (strcmp(token,".code")==0 ) break; } while(fgets(line,sizeof line,fp)!= NULL) { token=strtok(line,"\n\t\r "); //get the instruction mnemonic or label //======================================== FIRST PASS ====================================================== while (token) { if (strcmp(token,"ldi")==0) //---------------LDI INSTRUCTION-------------------- { op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ldi, which is the register that ldi loads op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ldi, which is the data that is to be loaded program[counter]=0x1000+hex2int(op1); //generate the first 16-bit of the ldi instruction counter++; //move to the second 16-bit of the ldi instruction if ((op2[0]=='0')&&(op2[1]=='x')) //if the 2nd operand is twos complement hexadecimal program[counter]=hex2int(op2+2)&0xffff; //convert it to integer and form the second 16-bit else if (( (op2[0])=='-') || ((op2[0]>='0')&&(op2[0]<='9'))) //if the 2nd operand is decimal program[counter]=atoi(op2)&0xffff; //convert it to integer and form the second 16-bit else //if the second operand is not decimal or hexadecimal, it is a laber or a variable. { //in this case, the 2nd 16-bits of the ldi instruction cannot be generated. lditable[noofldis].location = counter; //record the location of this 2nd 16-bit op1=(char*)malloc(sizeof(op2)); //and the name of the label/variable that it must contain strcpy(op1,op2); //in the lditable array. lditable[noofldis].name = op1; noofldis++; } counter++; //skip to the next memory location } else if (strcmp(token,"ld")==0) //------------LD INSTRUCTION--------------------- { op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ld, which is the destination register op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ld, which is the source register ch = (op1[0]-48)| ((op2[0]-48) << 3); //form bits 11-0 of machine code. 48 is ASCII value of '0' program[counter]=0x2000+((ch)&0x00ff); //form the instruction and write it to memory counter++; //skip to the next empty location in memory } else if (strcmp(token,"st")==0) //-------------ST INSTRUCTION-------------------- { //to be added } else if (strcmp(token,"jz")==0) //------------- CONDITIONAL JUMP ------------------ { //to be added } else if (strcmp(token,"jmp")==0) //-------------- JUMP ----------------------------- { op1 = strtok(NULL,"\n\t\r "); //read the label string jumptable[noofjumps].location = counter; //write the jz instruction's location into the jumptable op2=(char*)malloc(sizeof(op1)); //allocate space for the label strcpy(op2,op1); //copy the label into the allocated space jumptable[noofjumps].name=op2; //point to the label from the jumptable noofjumps++; //skip to the next empty location in jumptable program[counter]=0x5000; //write the incomplete instruction (just opcode) to memory counter++; //skip to the next empty location in memory. } else if (strcmp(token,"add")==0) //----------------- ADD ------------------------------- { op1 = strtok(NULL,"\n\t\r "); op2 = strtok(NULL,"\n\t\r "); op3 = strtok(NULL,"\n\t\r "); chch = (op1[0]-48)| ((op2[0]-48)<<3)|((op3[0]-48)<<6); program[counter]=0x7000+((chch)&0x00ff); counter++; } else if (strcmp(token,"sub")==0) { //to be added } else if (strcmp(token,"and")==0) { //to be added } else if (strcmp(token,"or")==0) { //to be added } else if (strcmp(token,"xor")==0) { //to be added } else if (strcmp(token,"not")==0) { op1 = strtok(NULL,"\n\t\r "); op2 = strtok(NULL,"\n\t\r "); ch = (op1[0]-48)| ((op2[0]-48)<<3); program[counter]=0x7500+((ch)&0x00ff); counter++; } else if (strcmp(token,"mov")==0) { //to be added } else if (strcmp(token,"inc")==0) { op1 = strtok(NULL,"\n\t\r "); ch = (op1[0]-48)| ((op1[0]-48)<<3); program[counter]=0x7700+((ch)&0x00ff); counter++; } else if (strcmp(token,"dec")==0) { //to be added } else //------WHAT IS ENCOUNTERED IS NOT AN INSTRUCTION BUT A LABEL. UPDATE THE LABEL TABLE-------- { labeltable[nooflabels].location = counter; //read the label and update labeltable. op1=(char*)malloc(sizeof(token)); strcpy(op1,token); labeltable[nooflabels].name=op1; nooflabels++; } token = strtok(NULL,",\n\t\r "); // if what is read before is an instruction, this will be NULL //if what is read before is an label, this will be an opcode. } } //================================= SECOND PASS ============================== //supply the address fields of the jump and jz instructions by matching jumptable and labeltable int i,j; for (i=0; i<noofjumps;i++) //for all jump/jz instructions encountered { j=0; while ((j<nooflabels)&&( strcmp(jumptable[i].name , labeltable[j].name ) != 0 )) //if the label for this jump/jz does not match with the j++; // jth label in the labeltable, check the next label.. program[jumptable[i].location] +=(labeltable[j].location-jumptable[i].location-1)&0x0fff; //copy the jump address into memory. } //search for the start of the .data segment rewind(fp); while(fgets(line,sizeof line,fp)!= NULL) //skip till .data, if no .data, also ok. { token=strtok(line,"\n\t\r "); if (strcmp(token,".data")==0 ) break; } // process the .data segment and generate the variabletable[] array. int dataarea=0; while(fgets(line,sizeof line,fp)!= NULL) { token=strtok(line,"\n\t\r "); if (strcmp(token,".code")==0 ) //go till the .code segment break; else if (token[strlen(token)-1]==':') { token[strlen(token)-1]='\0'; //will not cause memory leak, as we do not do malloc variabletable[noofvariables].location=counter+dataarea; op1=(char*)malloc(sizeof(token)); strcpy(op1,token); variabletable[noofvariables].name=op1; token = strtok(NULL,",\n\t\r "); if (token==NULL) program[counter+dataarea]=0; else if (strcmp(token, ".space")==0) { token=strtok(NULL,"\n\t\r "); dataarea+=atoi(token); } else if((token[0]=='0')&&(token[1]=='x')) program[counter+dataarea]=hex2int(token+2)&0xffff; else if (( (token[0])=='-') || ('0'<=(token[0])&&(token[0]<='9')) ) program[counter+dataarea]=atoi(token)&0xffff; noofvariables++; dataarea++; } } // supply the address fields for the ldi instructions from the variable table for( i=0; i<noofldis;i++) { j=0; while ((j<noofvariables)&&( strcmp( lditable[i].name , variabletable[j].name)!=0 )) j++; if (j<noofvariables) program[lditable[i].location] = variabletable[j].location; } // supply the address fields for the ldi instructions from the label table for( i=0; i<noofldis;i++) { j=0; while ((j<nooflabels)&&( strcmp( lditable[i].name , labeltable[j].name)!=0 )) j++; if (j<nooflabels){ program[lditable[i].location] = (labeltable[j].location)&0x0fff; printf("%d %d %d\n", i, j, (labeltable[j].location)); } } //display the resulting tables printf("LABEL TABLE\n"); for (i=0;i<nooflabels;i++) printf("%d %s\n", labeltable[i].location, labeltable[i].name); printf("\n"); printf("JUMP TABLE\n"); for (i=0;i<noofjumps;i++) printf("%d %s\n", jumptable[i].location, jumptable[i].name); printf("\n"); printf("VARIABLE TABLE\n"); for (i=0;i<noofvariables;i++) printf("%d %s\n", variabletable[i].location, variabletable[i].name); printf("\n"); printf("LDI INSTRUCTIONS\n"); for (i=0;i<noofldis;i++) printf("%d %s\n", lditable[i].location, lditable[i].name); printf("\n"); fclose(fp); fp = fopen("RAM","w"); fprintf(fp,"v2.0 raw\n"); for (i=0;i<counter+dataarea;i++) fprintf(fp,"%04x\n",program[i]); } }