Add highlighting, fixed len instructions and more

This commit is contained in:
2025-02-09 21:15:50 +01:00
parent 45653b6372
commit e133c2df3a
17 changed files with 753 additions and 338 deletions

View File

@@ -290,7 +290,7 @@ const char *readLine(const char *source, char *buffer, size_t maxLen) {
// The address is simply the offset into the output machine code buffer.
// For this example, every instruction is assumed to have a fixed length (opcode plus operand bytes).
//
int firstPass(const char *source) {
void firstPass(const char *source) {
char line[MAX_LINE_LENGTH];
int addr = 0;
labelCount = 0;
@@ -316,152 +316,170 @@ int firstPass(const char *source) {
continue;
strcpy(line, rest);
}
// Parse the mnemonic and operands.
char mnemonic[32], operand1[64], operand2[64], operand3[64];
operand1[0] = '\0';
operand2[0] = '\0';
int tokenCount = sscanf(line, "%31s %63[^, ] %63[^, ] %63s",
mnemonic, operand1, operand2, operand3);
// Use the mapper to get a base opcode.
int baseOpcode = getOpcode(mnemonic);
if (baseOpcode == -1) {
printf("Unknown instruction: %s\n", mnemonic);
continue;
}
int size = 0; // Instruction size in bytes.
if (baseOpcode == -2) {
// MOV instruction requires further resolution.
int resolvedOpcode = resolveMOV(operand1, operand2);
if (resolvedOpcode == MOV_RN_IMM || resolvedOpcode == MOV_RN_RM) {
size = 3; // opcode (1) + reg (1) + immediate or register (1)
} else if (resolvedOpcode == MOV_RN_ADDR || resolvedOpcode == MOV_ADDR_RN) {
size = 6; // opcode (1) + one operand as register (1) and one 32-bit address (4) [+ padding if needed]
} else {
size = 3; // fallback
}
} else if (baseOpcode < 0) {
// Ambiguous instructions that use resolveALU.
// For JMP and jump-bit instructions, the jump target is in operand1.
if (baseOpcode == -11) {
// JMP: if operand1 starts with '+' or '-', it's relative.
if (operand1[0] == '+' || operand1[0] == '-') {
// resolve as JMP_REL.
int resolvedOpcode = resolveALU(baseOpcode, operand1);
size = 2; // opcode (1) + 1-byte relative offset (1)
} else {
int resolvedOpcode = resolveALU(baseOpcode, operand1);
size = 5; // opcode (1) + 32-bit absolute address (4)
}
} else if (baseOpcode == -14 || baseOpcode == -15) {
// JMPBS or JMPBC (jump if bit set/clear)
int resolvedOpcode = resolveALU(baseOpcode, operand1);
if (operand1[0] == 'R' || operand1[0] == 'r')
size = 7; // opcode (1) + register (1) + bit (1) + 32-bit jump address (4)
else
size = 10; // opcode (1) + 32-bit memory address (4) + bit (1) + 32-bit jump address (4)
} else {
// For arithmetic ALU instructions and INC/DEC,
// use operand2 to resolve.
int resolvedOpcode = resolveALU(baseOpcode, operand2);
switch (resolvedOpcode) {
case ADD_RN_RM:
case SUB_RN_RM:
case MUL_RN_RM:
case DIV_RN_RM:
case MOD_RN_RM:
case AND_RN_RM:
case OR_RN_RM:
case XOR_RN_RM:
case ADD_RN_IMM:
case SUB_RN_IMM:
case MUL_RN_IMM:
case DIV_RN_IMM:
case MOD_RN_IMM:
case AND_RN_IMM:
case OR_RN_IMM:
case XOR_RN_IMM:
size = 3; // opcode (1) + register (1) + reg/immediate (1)
break;
case INC_RN:
case DEC_RN:
size = 2; // opcode (1) + register (1)
break;
case INC_ADDR:
case DEC_ADDR:
size = 5; // opcode (1) + 32-bit address (4)
break;
default:
size = 3;
break;
}
}
} else {
// Non-ambiguous instructions that have positive opcodes.
// Use the mapping value (baseOpcode) directly.
switch (baseOpcode) {
case NOP:
case BRK:
case HLT:
size = 1;
break;
case SWAP:
case CMP:
size = 3;
break;
case SWAPN:
case NEG_RN:
case NOT_RN:
size = 2;
break;
case SHL_RN_IMM:
case SHR_RN_IMM:
case SAR_RN_IMM:
size = 3;
break;
case JE:
case JNE:
case JG:
case JL:
case JGE:
case JLE:
case CALL:
size = 5; // opcode (1) + 32-bit address (4)\n break;
case RET:
size = 1;
break;
default:
size = 3;
break;
}
}
addr += size;
}
return addr;
// // Parse the mnemonic and operands.
// char mnemonic[32], operand1[64], operand2[64], operand3[64];
// operand1[0] = '\0';
// operand2[0] = '\0';
// int tokenCount = sscanf(line, "%31s %63[^ ] %63[^ ] %63s",
// mnemonic, operand1, operand2, operand3);
//
//
// // Use the mapper to get a base opcode.
// int baseOpcode = getOpcode(mnemonic);
// if (baseOpcode == -1) {
// printf("Unknown instruction: %s\n", mnemonic);
// continue;
// }
//
// int size = CPU_INSTRUCTION_SIZE; // Instruction size in bytes.
// if (baseOpcode == -2) {
// // MOV instruction requires further resolution.
// int resolvedOpcode = resolveMOV(operand1, operand2);
// if (resolvedOpcode == MOV_RN_IMM || resolvedOpcode == MOV_RN_RM) {
// size = 3; // opcode (1) + reg (1) + immediate or register (1)
// } else if (resolvedOpcode == MOV_RN_ADDR || resolvedOpcode == MOV_ADDR_RN) {
// size = 6; // opcode (1) + one operand as register (1) and one 32-bit address (4) [+ padding if needed]
// } else {
// size = 3; // fallback
// }
// } else if (baseOpcode < 0) {
// // Ambiguous instructions that use resolveALU.
// // For JMP and jump-bit instructions, the jump target is in operand1.
// if (baseOpcode == -11) {
// // JMP: if operand1 starts with '+' or '-', it's relative.
// if (operand1[0] == '+' || operand1[0] == '-') {
// // resolve as JMP_REL.
// int resolvedOpcode = resolveALU(baseOpcode, operand1);
// size = 2; // opcode (1) + 1-byte relative offset (1)
// } else {
// int resolvedOpcode = resolveALU(baseOpcode, operand1);
// size = 5; // opcode (1) + 32-bit absolute address (4)
// }
// } else if (baseOpcode == -14 || baseOpcode == -15) {
// // JMPBS or JMPBC (jump if bit set/clear)
// int resolvedOpcode = resolveALU(baseOpcode, operand1);
// if (operand1[0] == 'R' || operand1[0] == 'r')
// size = 7; // opcode (1) + register (1) + bit (1) + 32-bit jump address (4)
// else
// size = 10; // opcode (1) + 32-bit memory address (4) + bit (1) + 32-bit jump address (4)
// } else {
// // For arithmetic ALU instructions and INC/DEC,
// // use operand2 to resolve.
// int resolvedOpcode = resolveALU(baseOpcode, operand2);
// switch (resolvedOpcode) {
// case ADD_RN_RM:
// case SUB_RN_RM:
// case MUL_RN_RM:
// case DIV_RN_RM:
// case MOD_RN_RM:
// case AND_RN_RM:
// case OR_RN_RM:
// case XOR_RN_RM:
// case ADD_RN_IMM:
// case SUB_RN_IMM:
// case MUL_RN_IMM:
// case DIV_RN_IMM:
// case MOD_RN_IMM:
// case AND_RN_IMM:
// case OR_RN_IMM:
// case XOR_RN_IMM:
// size = 3; // opcode (1) + register (1) + reg/immediate (1)
// break;
// case INC_RN:
// case DEC_RN:
// size = 2; // opcode (1) + register (1)
// break;
// case INC_ADDR:
// case DEC_ADDR:
// size = 5; // opcode (1) + 32-bit address (4)
// break;
// default:
// size = 3;
// break;
// }
// }
// } else {
// // Non-ambiguous instructions that have positive opcodes.
// // Use the mapping value (baseOpcode) directly.
// switch (baseOpcode) {
// case NOP:
// case BRK:
// case HLT:
// size = 1;
// break;
// case SWAP:
// case CMP:
// size = 3;
// break;
// case SWAPN:
// case NEG_RN:
// case NOT_RN:
// size = 2;
// break;
// case SHL_RN_IMM:
// case SHR_RN_IMM:
// case SAR_RN_IMM:
// size = 3;
// break;
// case JE:
// case JNE:
// case JG:
// case JL:
// case JGE:
// case JLE:
// case CALL:
// size = 5; // opcode (1) + 32-bit address (4)\n break;
// case RET:
// size = 1;
// break;
// default:
// size = 3;
// break;
// }
// }
// addr += size;
// }
// return addr;
}
//
// The second pass actually translates the assembly instructions to machine code.
// The machine code is written into the provided buffer. (It must be large enough.)
//
int secondPass(const char *source, uint8_t *code) {
int completePass(const char *source, CPU *cpu, bool erase) {
if (erase) {
memset(cpu->memory, 0, sizeof(cpu->memory));
memset(cpu->regs, 0, sizeof(cpu->regs));
memset(cpu->stack, 0, sizeof(cpu->stack));
memset(cpu->addrToLineMapper, 0, sizeof(cpu->addrToLineMapper));
cpu->pc = 0;
cpu->stack_ptr = 0;
cpu->flags = 0;
cpu->cycle = 0;
}
// First pass: determine label addresses.
firstPass(source);
char line[MAX_LINE_LENGTH];
int addr = 0;
uint32_t addr = 0;
const char *ptr = source;
uint32_t lineIndex = 0;
while (*ptr) {
ptr = readLine(ptr, line, sizeof(line));
trim(line);
if (line[0] == '\0' || line[0] == ';' || line[0] == '#')
if (line[0] == '\0' || line[0] == ';' || line[0] == '#') {
lineIndex++;
continue;
}
// Remove any label definitions (up to the colon).
char *colon = strchr(line, ':');
if (colon != NULL) {
*colon = ' '; // Replace the colon so the rest of the line can be parsed.
lineIndex++;
continue;
}
if (strlen(line) == 0)
@@ -470,11 +488,12 @@ int secondPass(const char *source, uint8_t *code) {
// Parse mnemonic and up to three operands.
char mnemonic[32], operand1[64], operand2[64], operand3[64];
mnemonic[0] = operand1[0] = operand2[0] = operand3[0] = '\0';
int tokenCount = sscanf(line, "%31s %63[^, ] %63[^, ] %63s",
int tokenCount = sscanf(line, "%31s %63[^ ] %63[^ ] %63s",
mnemonic, operand1, operand2, operand3);
// (Optionally, you might trim each operand individually here.)
uint32_t oldAddr = addr;
// Map the mnemonic to a base opcode.
int baseOpcode = getOpcode(mnemonic);
if (baseOpcode == -1) {
@@ -489,17 +508,17 @@ int secondPass(const char *source, uint8_t *code) {
return 1;
}
int resolvedOpcode = resolveMOV(operand1, operand2);
code[addr++] = resolvedOpcode;
cpu->memory[addr++] = resolvedOpcode;
if (resolvedOpcode == MOV_RN_IMM) {
int reg = parseRegister(operand1);
uint8_t imm = parseImmediate(operand2);
code[addr++] = reg;
code[addr++] = imm;
cpu->memory[addr++] = reg;
cpu->memory[addr++] = imm;
} else if (resolvedOpcode == MOV_RN_RM) {
int regDest = parseRegister(operand1);
int regSrc = parseRegister(operand2);
code[addr++] = regDest;
code[addr++] = regSrc;
cpu->memory[addr++] = regDest;
cpu->memory[addr++] = regSrc;
} else if (resolvedOpcode == MOV_RN_ADDR) {
int reg = parseRegister(operand1);
// Assume source is written as "[address]": remove the brackets.
@@ -507,11 +526,10 @@ int secondPass(const char *source, uint8_t *code) {
strncpy(addrStr, operand2 + 1, strlen(operand2) - 2);
addrStr[strlen(operand2) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = reg;
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = reg;
cpu->memory[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = (memAddr >> 8) & 0xFF;
cpu->memory[addr++] = (memAddr >> 16) & 0xFF;
} else if (resolvedOpcode == MOV_ADDR_RN) {
// Destination is memory (written as "[address]").
char addrStr[32];
@@ -519,11 +537,10 @@ int secondPass(const char *source, uint8_t *code) {
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
int reg = parseRegister(operand2);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
code[addr++] = reg;
cpu->memory[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = (memAddr >> 8) & 0xFF;
cpu->memory[addr++] = (memAddr >> 16) & 0xFF;
cpu->memory[addr++] = reg;
}
}
// --- INC and DEC (baseOpcode == -12 or -13) ---
@@ -534,20 +551,19 @@ int secondPass(const char *source, uint8_t *code) {
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
cpu->memory[addr++] = resolvedOpcode;
if (operand1[0] == 'R' || operand1[0] == 'r') {
int reg = parseRegister(operand1);
code[addr++] = reg;
cpu->memory[addr++] = reg;
} else {
// Assume memory reference written as "[address]".
char addrStr[32];
strncpy(addrStr, operand1 + 1, strlen(operand1) - 2);
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = (memAddr >> 8) & 0xFF;
cpu->memory[addr++] = (memAddr >> 16) & 0xFF;
}
}
// --- Other Ambiguous ALU Instructions (ADD, SUB, MUL, etc.) ---
@@ -559,15 +575,15 @@ int secondPass(const char *source, uint8_t *code) {
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand2);
code[addr++] = resolvedOpcode;
cpu->memory[addr++] = resolvedOpcode;
int regDest = parseRegister(operand1);
code[addr++] = regDest;
cpu->memory[addr++] = regDest;
if (operand2[0] == 'R' || operand2[0] == 'r') {
int regSrc = parseRegister(operand2);
code[addr++] = regSrc;
cpu->memory[addr++] = regSrc;
} else {
uint8_t imm = parseImmediate(operand2);
code[addr++] = imm;
cpu->memory[addr++] = imm;
}
}
// --- JMP Instruction (baseOpcode == -11) ---
@@ -577,18 +593,17 @@ int secondPass(const char *source, uint8_t *code) {
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
cpu->memory[addr++] = resolvedOpcode;
if (operand1[0] == '+' || operand1[0] == '-') {
// Relative jump: one-byte offset.
uint8_t offset = parseImmediate(operand1);
code[addr++] = offset;
cpu->memory[addr++] = offset;
} else {
// Absolute jump: use label lookup for 32-bit address.
uint32_t jumpAddr = (uint32_t) lookupLabel(operand1);
code[addr++] = (jumpAddr >> 24) & 0xFF;
code[addr++] = (jumpAddr >> 16) & 0xFF;
code[addr++] = (jumpAddr >> 8) & 0xFF;
code[addr++] = jumpAddr & 0xFF;
cpu->memory[addr++] = jumpAddr & 0xFF;
cpu->memory[addr++] = (jumpAddr >> 8) & 0xFF;
cpu->memory[addr++] = (jumpAddr >> 16) & 0xFF;
}
}
// --- Jump Bit Set/Clear Instructions (JMPBS, JMPBC) ---
@@ -598,33 +613,31 @@ int secondPass(const char *source, uint8_t *code) {
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
cpu->memory[addr++] = resolvedOpcode;
// Encode the source operand (register or memory).
if (operand1[0] == 'R' || operand1[0] == 'r') {
int reg = parseRegister(operand1);
code[addr++] = reg;
cpu->memory[addr++] = reg;
} else {
char addrStr[32];
strncpy(addrStr, operand1 + 1, strlen(operand1) - 2);
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = memAddr & 0xFF;
cpu->memory[addr++] = (memAddr >> 8) & 0xFF;
cpu->memory[addr++] = (memAddr >> 16) & 0xFF;
}
// Encode the bit number (a one-byte immediate).
uint8_t bitVal = parseImmediate(operand2);
code[addr++] = bitVal;
cpu->memory[addr++] = bitVal;
// Encode the jump target (label -> 32-bit address).
uint32_t jumpAddr = (uint32_t) lookupLabel(operand3);
code[addr++] = (jumpAddr >> 24) & 0xFF;
code[addr++] = (jumpAddr >> 16) & 0xFF;
code[addr++] = (jumpAddr >> 8) & 0xFF;
code[addr++] = jumpAddr & 0xFF;
cpu->memory[addr++] = jumpAddr & 0xFF;
cpu->memory[addr++] = (jumpAddr >> 8) & 0xFF;
cpu->memory[addr++] = (jumpAddr >> 16) & 0xFF;
}
// --- Non-ambiguous Instructions ---
else if (baseOpcode > 0) {
else if (baseOpcode >= 0) {
switch (baseOpcode) {
case CMP:
case SWAP: {
@@ -632,11 +645,11 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
return 1;
}
code[addr++] = baseOpcode;
cpu->memory[addr++] = baseOpcode;
int r1 = parseRegister(operand1);
int r2 = parseRegister(operand2);
code[addr++] = r1;
code[addr++] = r2;
cpu->memory[addr++] = r1;
cpu->memory[addr++] = r2;
break;
}
case SWAPN:
@@ -646,9 +659,9 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: %s requires one operand.\n", mnemonic);
return 1;
}
code[addr++] = baseOpcode;
cpu->memory[addr++] = baseOpcode;
int reg = parseRegister(operand1);
code[addr++] = reg;
cpu->memory[addr++] = reg;
break;
}
case SHL_RN_IMM:
@@ -658,11 +671,11 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
return 1;
}
code[addr++] = baseOpcode;
cpu->memory[addr++] = baseOpcode;
int reg = parseRegister(operand1);
code[addr++] = reg;
cpu->memory[addr++] = reg;
uint8_t imm = parseImmediate(operand2);
code[addr++] = imm;
cpu->memory[addr++] = imm;
break;
}
case JE:
@@ -676,7 +689,7 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: %s requires one operand.\n", mnemonic);
return 1;
}
code[addr++] = baseOpcode;
cpu->memory[addr++] = baseOpcode;
// If the operand isnt purely numeric, treat it as a label.
if (!isdigit(operand1[0])) {
int labelAddr = lookupLabel(operand1);
@@ -684,16 +697,14 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: undefined label '%s'\n", operand1);
return 1;
}
code[addr++] = (labelAddr >> 24) & 0xFF;
code[addr++] = (labelAddr >> 16) & 0xFF;
code[addr++] = (labelAddr >> 8) & 0xFF;
code[addr++] = labelAddr & 0xFF;
cpu->memory[addr++] = labelAddr & 0xFF;
cpu->memory[addr++] = (labelAddr >> 8) & 0xFF;
cpu->memory[addr++] = (labelAddr >> 16) & 0xFF;
} else {
uint32_t immAddr = (uint32_t) strtoul(operand1, NULL, 0);
code[addr++] = (immAddr >> 24) & 0xFF;
code[addr++] = (immAddr >> 16) & 0xFF;
code[addr++] = (immAddr >> 8) & 0xFF;
code[addr++] = immAddr & 0xFF;
cpu->memory[addr++] = immAddr & 0xFF;
cpu->memory[addr++] = (immAddr >> 8) & 0xFF;
cpu->memory[addr++] = (immAddr >> 16) & 0xFF;
}
break;
}
@@ -701,7 +712,7 @@ int secondPass(const char *source, uint8_t *code) {
case BRK:
case HLT:
case NOP: {
code[addr++] = baseOpcode;
cpu->memory[addr++] = baseOpcode;
break;
}
default: {
@@ -713,17 +724,14 @@ int secondPass(const char *source, uint8_t *code) {
fprintf(stderr, "Error: Unknown instruction '%s'\n", mnemonic);
return 1;
}
const uint32_t remainingBytes = CPU_INSTRUCTION_SIZE - (addr - oldAddr);
if (remainingBytes > CPU_INSTRUCTION_SIZE) {
printf("HELP, INSTRUCTION SIZE SMALLER THAN INSTRUCTION");
}
cpu->addrToLineMapper[(addr - (addr % CPU_INSTRUCTION_SIZE)) / CPU_INSTRUCTION_SIZE] = lineIndex;
addr += remainingBytes;
lineIndex++;
}
return addr;
}
void completePass(const char *input, CPU *cpu, bool erase) {
// First pass: determine label addresses.
if (erase) {
init_cpu(cpu);
}
firstPass(input);
secondPass(input, cpu->memory);
}

View File

@@ -77,15 +77,13 @@ int resolveALU(int baseOpcode, const char *src);
// The address is simply the offset into the output machine code buffer.
// For this example, every instruction is assumed to have a fixed length (opcode plus operand bytes).
//
int firstPass(const char *source);
void firstPass(const char *source);
//
// The second pass actually translates the assembly instructions to machine code.
// The machine code is written into the provided buffer. (It must be large enough.)
//
int secondPass(const char *source, uint8_t *code);
void completePass(const char *input, CPU *cpu, bool erase);
int completePass(const char *input, CPU *cpu, bool erase);
#endif //RISCB_ASSEMBLER_H