90 lines
2.5 KiB
C
90 lines
2.5 KiB
C
//
|
||
// Created by bruno on 1.2.2025.
|
||
//
|
||
|
||
#ifndef RISCB_ASSEMBLER_H
|
||
#define RISCB_ASSEMBLER_H
|
||
|
||
#include <stdio.h>
|
||
#include <stdint.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <ctype.h>
|
||
#include "../cpu/core.h"
|
||
|
||
//
|
||
// Definitions used by the CPU and the assembler
|
||
//
|
||
|
||
#define MAX_LINE_LENGTH 256
|
||
#define MAX_LABELS 1024
|
||
|
||
//
|
||
// Label table entry
|
||
//
|
||
typedef struct {
|
||
char name[64];
|
||
uint32_t address; // address (in the output machine code)
|
||
} Label;
|
||
|
||
extern Label labels[MAX_LABELS];
|
||
extern int labelCount;
|
||
|
||
//
|
||
// Helper functions for string manipulation
|
||
//
|
||
void trim(char *s);
|
||
|
||
void toUpperCase(char *string);
|
||
|
||
void toLowerCase(char *string);
|
||
|
||
// Look up a label by name; returns -1 if not found.
|
||
int lookupLabel(const char *name);
|
||
|
||
// Add a label to the table
|
||
int addLabel(const char *name, uint32_t address);
|
||
|
||
//
|
||
// Parse a register string (e.g., "R0", "R1", etc.) and return it's number.
|
||
// Returns -1 on error.
|
||
int parseRegister(const char *token);
|
||
|
||
// Parse an immediate value (supports decimal and 0x... hexadecimal)
|
||
uint8_t parseImmediate(const char *token);
|
||
|
||
//
|
||
// Map an instruction mnemonic (string) to its opcode value and expected operand types.
|
||
// For simplicity, we will return the opcode value and then in our parser we’ll decide how many operands to expect.
|
||
// (In a full assembler you might use a more sophisticated data structure.)
|
||
//
|
||
int getOpcode(char *mnemonic);
|
||
|
||
//
|
||
// In this simple assembler, some instructions share a mnemonic, and we must choose the correct opcode
|
||
// based on the type of the operand (register vs. immediate vs. memory).
|
||
// The following helper functions decide that, given two operands (as strings).
|
||
//
|
||
// For example, "MOV Rn, 42" should choose MOV_IMM_RN, while "MOV Rn, Rm" should choose MOV_RN_RM.
|
||
// We assume that memory addresses are written in square brackets, e.g. "[123]".
|
||
//
|
||
int resolveMOV(const char *dest, const char *src);
|
||
|
||
int resolveALU(int baseOpcode, const char *src);
|
||
|
||
//
|
||
// The first pass scans the assembly source file to record all labels and their addresses.
|
||
// The address is simply the offset into the output machine code buffer.
|
||
// For this example, every instruction is assumed to have a fixed length (opcode plus operand bytes).
|
||
//
|
||
void firstPass(const char *source);
|
||
|
||
//
|
||
// The second pass actually translates the assembly instructions to machine code.
|
||
// The machine code is written into the provided buffer. (It must be large enough.)
|
||
//
|
||
uint32_t completePass(const char *input, CPU *cpu, bool erase);
|
||
|
||
|
||
#endif //RISCB_ASSEMBLER_H
|