Files
RISC-B/assembler/assembler.h
2025-02-16 17:28:10 +01:00

90 lines
2.5 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//
// Created by bruno on 1.2.2025.
//
#ifndef RISCB_ASSEMBLER_H
#define RISCB_ASSEMBLER_H
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "../cpu/core.h"
//
// Definitions used by the CPU and the assembler
//
#define MAX_LINE_LENGTH 256
#define MAX_LABELS 1024
//
// Label table entry
//
typedef struct {
char name[64];
uint32_t address; // address (in the output machine code)
} Label;
extern Label labels[MAX_LABELS];
extern int labelCount;
//
// Helper functions for string manipulation
//
void trim(char *s);
void toUpperCase(char *string);
void toLowerCase(char *string);
// Look up a label by name; returns -1 if not found.
int lookupLabel(const char *name);
// Add a label to the table
int addLabel(const char *name, uint32_t address);
//
// Parse a register string (e.g., "R0", "R1", etc.) and return it's number.
// Returns -1 on error.
int parseRegister(const char *token);
// Parse an immediate value (supports decimal and 0x... hexadecimal)
uint8_t parseImmediate(const char *token);
//
// Map an instruction mnemonic (string) to its opcode value and expected operand types.
// For simplicity, we will return the opcode value and then in our parser well decide how many operands to expect.
// (In a full assembler you might use a more sophisticated data structure.)
//
int getOpcode(char *mnemonic);
//
// In this simple assembler, some instructions share a mnemonic, and we must choose the correct opcode
// based on the type of the operand (register vs. immediate vs. memory).
// The following helper functions decide that, given two operands (as strings).
//
// For example, "MOV Rn, 42" should choose MOV_IMM_RN, while "MOV Rn, Rm" should choose MOV_RN_RM.
// We assume that memory addresses are written in square brackets, e.g. "[123]".
//
int resolveMOV(const char *dest, const char *src);
int resolveALU(int baseOpcode, const char *src);
//
// The first pass scans the assembly source file to record all labels and their addresses.
// The address is simply the offset into the output machine code buffer.
// For this example, every instruction is assumed to have a fixed length (opcode plus operand bytes).
//
void firstPass(const char *source);
//
// The second pass actually translates the assembly instructions to machine code.
// The machine code is written into the provided buffer. (It must be large enough.)
//
uint32_t completePass(const char *input, CPU *cpu, bool erase);
#endif //RISCB_ASSEMBLER_H