Skip to content

Bird: The Assembler

// to compile, gcc assembler.c -o assembler
// No error check is provided.
// Variable names cannot start with numeric characters, ie, with 0-9.
// hexadecimals are twos complement.
// first address of the code section is zero, and the data section follows the code section in memory.
// four tables are formed: jump table, ldi table, label table and variable table.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


//Converts a hexadecimal string to integer.
int hex2int( char* hex)
{
    int result=0;

    while ((*hex)!='\0')
    {
        if (('0'<=(*hex))&&((*hex)<='9'))
            result = result*16 + (*hex) -'0';
        else if (('a'<=(*hex))&&((*hex)<='f'))
            result = result*16 + (*hex) -'a'+10;
        else if (('A'<=(*hex))&&((*hex)<='F'))
            result = result*16 + (*hex) -'A'+10;
        hex++;
    }
    return(result);
}


main()
{
    FILE *fp;
    char line[100];
    char *token = NULL;
    char *op1, *op2, *op3, *label;
    char ch;
    int  chch;

    int program[1000];
    int counter=0;  //holds the address of the machine code instruction


    struct label_or_variable
    {
        int location;
        char *name;
    };

// A label is a symbol which mark a location within the code section. In the example
// program above, the strings "lpp", "loop" and "lp1" are labels.
// In reptile, labels are used by jump, jz and ldi instructions.
    struct label_or_variable labeltable[50]; //there can be 50 labels at most in our programs
    int nooflabels = 0;                       //number of labels encountered during assembly.

 // A variable is a symbol which mark a location within the data section. In the example
// program above, the strings "", "" and "" are variables.
// In reptile, variables are used by ldi instructions.
    struct label_or_variable variabletable[50]; // The list of variables in .data section and their locations.
    int noofvariables = 0;    //number of jumps encountered during assembly.

// Jump instructions cannot be assembled readily because we may not know the value of
// the label when we encountered a jump instruction. This happens if the label used by
// that jump instruction appear below that jump instruction. This is the situation
// with the label "loop" in the example program above. Hence, the location of jump
// instructions must be stored.
    struct label_or_variable jumptable[100]; //There can be at most 100 jumps
    int noofjumps=0;                        //number of jump instructions encountered during assembly.


//Variables and labels are used by ldi instructions.
//The memory for the variables are traditionally allocated at the end of the code section.
//Hence their addresses are not known when we assemble a ldi instruction. Also, the value of
//a label may not be known when we encounter a ldi instruction which uses that label.
//Hence, the location of the ldi instructions must be kept, and these instructions must be
//modified when we discover the address of the label or variable that it uses.
    struct label_or_variable lditable[100];
    int noofldis=0;



    fp = fopen("code.txt","r");

    if (fp != NULL)
    {
        while(fgets(line,sizeof line,fp)!= NULL)  //skip till .code section
        {
            token=strtok(line,"\n\t\r ");
            if (strcmp(token,".code")==0 )
                break;
        }
        while(fgets(line,sizeof line,fp)!= NULL)
        {
            token=strtok(line,"\n\t\r ");  //get the instruction mnemonic or label

//========================================   FIRST PASS  ======================================================
            while (token)
            {
                if (strcmp(token,"ldi")==0)        //---------------LDI INSTRUCTION--------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");                                //get the 1st operand of ldi, which is the register that ldi loads
                    op2 = strtok(NULL,"\n\t\r ");                                //get the 2nd operand of ldi, which is the data that is to be loaded
                    program[counter]=0x1000+hex2int(op1);                        //generate the first 16-bit of the ldi instruction
                    counter++;                                                   //move to the second 16-bit of the ldi instruction
                    if ((op2[0]=='0')&&(op2[1]=='x'))                            //if the 2nd operand is twos complement hexadecimal
                        program[counter]=hex2int(op2+2)&0xffff;              //convert it to integer and form the second 16-bit
                    else if ((  (op2[0])=='-') || ((op2[0]>='0')&&(op2[0]<='9')))       //if the 2nd operand is decimal
                        program[counter]=atoi(op2)&0xffff;                         //convert it to integer and form the second 16-bit
                    else                                                           //if the second operand is not decimal or hexadecimal, it is a laber or a variable.
                    {                                                               //in this case, the 2nd 16-bits of the ldi instruction cannot be generated.
                        lditable[noofldis].location = counter;                 //record the location of this 2nd 16-bit
                        op1=(char*)malloc(sizeof(op2));                         //and the name of the label/variable that it must contain
                        strcpy(op1,op2);                                        //in the lditable array.
                        lditable[noofldis].name = op1;
                        noofldis++;
                    }
                    counter++;                                                     //skip to the next memory location
                }

                else if (strcmp(token,"ld")==0)      //------------LD INSTRUCTION---------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");                //get the 1st operand of ld, which is the destination register
                    op2 = strtok(NULL,"\n\t\r ");                //get the 2nd operand of ld, which is the source register
                    ch = (op1[0]-48)| ((op2[0]-48) << 3);        //form bits 11-0 of machine code. 48 is ASCII value of '0'
                    program[counter]=0x2000+((ch)&0x00ff);       //form the instruction and write it to memory
                    counter++;                                   //skip to the next empty location in memory
                }
                else if (strcmp(token,"st")==0) //-------------ST INSTRUCTION--------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");                //get the 1st operand of ld, which is the destination register
                    op2 = strtok(NULL,"\n\t\r ");                //get the 2nd operand of ld, which is the source register
                    chch = (op1[0]-48)<<3| ((op2[0]-48) << 6);        //form bits 11-0 of machine code. 48 is ASCII value of '0'
                    program[counter]=0x3000+((chch)&0x01ff);       //form the instruction and write it to memory
                    counter++;                                   //skip to the next empty location in memory
                    
                }
                else if (strcmp(token,"jz")==0) //------------- CONDITIONAL JUMP ------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");            //read the label string
                    jumptable[noofjumps].location = counter;    //write the jz instruction's location into the jumptable
                    op2=(char*)malloc(sizeof(op1));         //allocate space for the label
                    strcpy(op2,op1);                //copy the label into the allocated space
                    jumptable[noofjumps].name=op2;            //point to the label from the jumptable
                    noofjumps++;                    //skip to the next empty location in jumptable
                    program[counter]=0x4000;            //write the incomplete instruction (just opcode) to memory
                    counter++;
                    
                }
                else if (strcmp(token,"jmp")==0)  //-------------- JUMP -----------------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");            //read the label string
                    jumptable[noofjumps].location = counter;    //write the jz instruction's location into the jumptable
                    op2=(char*)malloc(sizeof(op1));         //allocate space for the label
                    strcpy(op2,op1);                //copy the label into the allocated space
                    jumptable[noofjumps].name=op2;            //point to the label from the jumptable
                    noofjumps++;                    //skip to the next empty location in jumptable
                    program[counter]=0x5000;            //write the incomplete instruction (just opcode) to memory
                    counter++;                    //skip to the next empty location in memory.
                }
                else if (strcmp(token,"add")==0) //----------------- ADD -------------------------------
                {
                    op1 = strtok(NULL,"\n\t\r ");
                    op2 = strtok(NULL,"\n\t\r ");
                    op3 = strtok(NULL,"\n\t\r ");
                    chch = (op1[0]-48)| ((op2[0]-48)<<6)|((op3[0]-48)<<3);
                    program[counter]=0x7000+((chch)&0x01ff);
                    counter++;
                }
                else if (strcmp(token,"sub")==0)
                {
                     //to be added
                }
                else if (strcmp(token,"and")==0)
                {
                    //to be added
                }
                else if (strcmp(token,"or")==0)
                {
                      //to be added
                }
                else if (strcmp(token,"xor")==0)
                {
                    //to be added
                }
                else if (strcmp(token,"not")==0)
                {
                    op1 = strtok(NULL,"\n\t\r ");
                    op2 = strtok(NULL,"\n\t\r ");
                    ch = (op1[0]-48)| ((op2[0]-48)<<3);
                    program[counter]=0x7E00+((ch)&0x00ff);
                    counter++;
                }
                else if (strcmp(token,"mov")==0)
                {
                    //to be added
                }
                else if (strcmp(token,"inc")==0)
                {
                    op1 = strtok(NULL,"\n\t\r ");
                    ch = (op1[0]-48)| ((op1[0]-48)<<3);
                    program[counter]=0x7E80+((ch)&0x00ff);
                    counter++;
                }
                else if (strcmp(token,"dec")==0)
                {
                     //to be added
                }
                else if (strcmp(token,"push")==0)
                {
                      //to be added
                }
                else if (strcmp(token,"pop")==0)
                {
                      //to be added
                }
                else if (strcmp(token,"call")==0)  //-------------- CALL -----------------------------
                {
                    //to be added
                }
                else if (strcmp(token,"ret")==0)
                {
                    //to be added
                }
                else //------WHAT IS ENCOUNTERED IS NOT AN INSTRUCTION BUT A LABEL. UPDATE THE LABEL TABLE--------
                {
                    labeltable[nooflabels].location = counter;  //read the label and update labeltable.
                    op1=(char*)malloc(sizeof(token));
                    strcpy(op1,token);
                    labeltable[nooflabels].name=op1;
                    nooflabels++;
                }
                token = strtok(NULL,",\n\t\r ");  // if what is read before is an instruction, this will be NULL
                                                  //if what is read before is an label, this will be an opcode.
            }
        }


//================================= SECOND PASS ==============================

//supply the address fields of the jump and jz instructions by matching jumptable and labeltable
        int i,j;
        for (i=0; i<noofjumps;i++)   //for all jump/jz instructions encountered
        {
            j=0;
            while ((j<nooflabels)&&( strcmp(jumptable[i].name , labeltable[j].name ) != 0 ))  //if the label for this jump/jz does not match with the
                j++;                                            // jth label in the labeltable, check the next label..
            program[jumptable[i].location] +=(labeltable[j].location-jumptable[i].location-1)&0x0fff;       //copy the jump address into memory.
        }


//search for the start of the .data segment
        rewind(fp);
        while(fgets(line,sizeof line,fp)!= NULL)  //skip till .data, if no .data, also ok.
        {
            token=strtok(line,"\n\t\r ");
            if (strcmp(token,".data")==0 )
                break;

        }

// process the .data segment and generate the variabletable[] array.
        int dataarea=0;
         while(fgets(line,sizeof line,fp)!= NULL)
        {
            token=strtok(line,"\n\t\r ");
            if (strcmp(token,".code")==0 )  //go till the .code segment
                break;
            else if (token[strlen(token)-1]==':')
            {
                token[strlen(token)-1]='\0';  //will not cause memory leak, as we do not do malloc
                variabletable[noofvariables].location=counter+dataarea;
                op1=(char*)malloc(sizeof(token));
                strcpy(op1,token);
                variabletable[noofvariables].name=op1;
                token = strtok(NULL,",\n\t\r ");
                if (token==NULL)
                    program[counter+dataarea]=0;
                else if (strcmp(token, ".space")==0)
                {
                    token=strtok(NULL,"\n\t\r ");
                    dataarea+=atoi(token);
                }
                else if((token[0]=='0')&&(token[1]=='x'))
                    program[counter+dataarea]=hex2int(token+2)&0xffff;
                else if ((  (token[0])=='-') || ('0'<=(token[0])&&(token[0]<='9'))  )
                    program[counter+dataarea]=atoi(token)&0xffff;
                noofvariables++;
                dataarea++;
            }
        }


// supply the address fields for the ldi instructions from the variable table
        for( i=0; i<noofldis;i++)
        {
            j=0;
            while ((j<noofvariables)&&( strcmp( lditable[i].name , variabletable[j].name)!=0 ))
                j++;
            if (j<noofvariables)
                program[lditable[i].location] = variabletable[j].location;
        }


// supply the address fields for the ldi instructions from the label table
        for( i=0; i<noofldis;i++)
        {
            j=0;
            while ((j<nooflabels)&&( strcmp( lditable[i].name , labeltable[j].name)!=0 ))
                j++;
            if (j<nooflabels){
                program[lditable[i].location] = (labeltable[j].location)&0x0fff;
                printf("%d %d %d\n", i, j, (labeltable[j].location));
            }
        }


//display the resulting tables
        printf("LABEL TABLE\n");
        for (i=0;i<nooflabels;i++)
            printf("%d %s\n", labeltable[i].location, labeltable[i].name);
        printf("\n");
        printf("JUMP TABLE\n");
        for (i=0;i<noofjumps;i++)
            printf("%d %s\n", jumptable[i].location, jumptable[i].name);
        printf("\n");
        printf("VARIABLE TABLE\n");
        for (i=0;i<noofvariables;i++)
            printf("%d %s\n", variabletable[i].location, variabletable[i].name);
        printf("\n");
        printf("LDI INSTRUCTIONS\n");
        for (i=0;i<noofldis;i++)
            printf("%d %s\n", lditable[i].location, lditable[i].name);
        printf("\n");
        fclose(fp);
        fp = fopen("RAM","w");
        fprintf(fp,"v2.0 raw\n");
        for (i=0;i<counter+dataarea;i++)
            fprintf(fp,"%04x\n",program[i]);
        fclose(fp);
        fp = fopen("ram.dat","w");
        for (i=0;i<counter+dataarea;i++)
            fprintf(fp, "%04x\n", program[i]);
    }
}