Something is almost working

This commit is contained in:
2025-02-08 23:23:21 +01:00
parent aaf3dfb40c
commit 45653b6372
15 changed files with 969 additions and 326 deletions

View File

@@ -32,10 +32,10 @@ int lookupLabel(const char *name) {
}
// Add a label to the table
void addLabel(const char *name, int address) {
int addLabel(const char *name, int address) {
if (labelCount >= MAX_LABELS) {
fprintf(stderr, "Too many labels!\n");
exit(1);
return 1;
}
strncpy(labels[labelCount].name, name, sizeof(labels[labelCount].name));
labels[labelCount].address = address;
@@ -90,6 +90,16 @@ void toUpperCase(char *string) {
if (*string > 0x60 && *string < 0x7b) {
(*string) -= 0x20;
}
string++;
}
}
void toLowerCase(char *string) {
while (*string) {
if (*string >= 'A' && *string <= 'Z') {
(*string) += 0x20;
}
string++;
}
}
@@ -104,6 +114,8 @@ int getOpcode(char *mnemonic) {
return NOP;
else if (strcmp(mnemonic, "BRK") == 0)
return BRK;
else if (strcmp(mnemonic, "HLT") == 0)
return HLT;
else if (strcmp(mnemonic, "MOV") == 0)
return -2; // Special case: we must decide between MOV_RN_IMM, MOV_RN_RM, MOV_RN_ADDR, MOV_ADDR_RN
else if (strcmp(mnemonic, "SWAP") == 0)
@@ -281,6 +293,7 @@ const char *readLine(const char *source, char *buffer, size_t maxLen) {
int firstPass(const char *source) {
char line[MAX_LINE_LENGTH];
int addr = 0;
labelCount = 0;
const char *ptr = source;
while (*ptr) {
@@ -305,10 +318,12 @@ int firstPass(const char *source) {
}
// Parse the mnemonic and operands.
char mnemonic[32], operand1[64], operand2[64];
char mnemonic[32], operand1[64], operand2[64], operand3[64];
operand1[0] = '\0';
operand2[0] = '\0';
sscanf(line, "%31s %63[^,], %63s", mnemonic, operand1, operand2);
int tokenCount = sscanf(line, "%31s %63[^, ] %63[^, ] %63s",
mnemonic, operand1, operand2, operand3);
// Use the mapper to get a base opcode.
int baseOpcode = getOpcode(mnemonic);
@@ -390,6 +405,7 @@ int firstPass(const char *source) {
switch (baseOpcode) {
case NOP:
case BRK:
case HLT:
size = 1;
break;
case SWAP:
@@ -439,57 +455,57 @@ int secondPass(const char *source, uint8_t *code) {
while (*ptr) {
ptr = readLine(ptr, line, sizeof(line));
trim(line);
if (line[0] == '\0' || line[0] == ';' || line[0] == '#')
continue;
// Process labels: replace colon with a space.
// Remove any label definitions (up to the colon).
char *colon = strchr(line, ':');
if (colon != NULL) {
*colon = ' ';
*colon = ' '; // Replace the colon so the rest of the line can be parsed.
continue;
}
if (strlen(line) == 0)
continue;
// Parse the mnemonic and operands.
char mnemonic[32], operand1[64], operand2[64];
operand1[0] = '\0';
operand2[0] = '\0';
sscanf(line, "%31s %63[^,], %63s", mnemonic, operand1, operand2);
// Parse mnemonic and up to three operands.
char mnemonic[32], operand1[64], operand2[64], operand3[64];
mnemonic[0] = operand1[0] = operand2[0] = operand3[0] = '\0';
int tokenCount = sscanf(line, "%31s %63[^, ] %63[^, ] %63s",
mnemonic, operand1, operand2, operand3);
// Use the mapper to get the base opcode.
// (Optionally, you might trim each operand individually here.)
// Map the mnemonic to a base opcode.
int baseOpcode = getOpcode(mnemonic);
if (baseOpcode == -1) {
fprintf(stderr, "Unknown instruction: %s\n", mnemonic);
exit(1);
return 1;
}
// --- MOV Instruction ---
if (baseOpcode == -2) { // MOV is ambiguous.
char *dest = strtok(NULL, " ,");
char *src = strtok(NULL, " ,");
if (!dest || !src) {
// --- MOV Instruction (baseOpcode == -2) ---
if (baseOpcode == -2) {
if (strlen(operand1) == 0 || strlen(operand2) == 0) {
fprintf(stderr, "Error: MOV requires two operands.\n");
exit(1);
return 1;
}
int resolvedOpcode = resolveMOV(dest, src);
int resolvedOpcode = resolveMOV(operand1, operand2);
code[addr++] = resolvedOpcode;
if (resolvedOpcode == MOV_RN_IMM) {
int reg = parseRegister(dest);
uint8_t imm = parseImmediate(src);
int reg = parseRegister(operand1);
uint8_t imm = parseImmediate(operand2);
code[addr++] = reg;
code[addr++] = imm;
} else if (resolvedOpcode == MOV_RN_RM) {
int regDest = parseRegister(dest);
int regSrc = parseRegister(src);
int regDest = parseRegister(operand1);
int regSrc = parseRegister(operand2);
code[addr++] = regDest;
code[addr++] = regSrc;
} else if (resolvedOpcode == MOV_RN_ADDR) {
int reg = parseRegister(dest);
// Remove brackets from src, assuming format "[address]"
int reg = parseRegister(operand1);
// Assume source is written as "[address]": remove the brackets.
char addrStr[32];
strncpy(addrStr, src + 1, strlen(src) - 2);
addrStr[strlen(src) - 2] = '\0';
strncpy(addrStr, operand2 + 1, strlen(operand2) - 2);
addrStr[strlen(operand2) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = reg;
code[addr++] = (memAddr >> 24) & 0xFF;
@@ -497,12 +513,12 @@ int secondPass(const char *source, uint8_t *code) {
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
} else if (resolvedOpcode == MOV_ADDR_RN) {
// dest is memory reference.
// Destination is memory (written as "[address]").
char addrStr[32];
strncpy(addrStr, dest + 1, strlen(dest) - 2);
addrStr[strlen(dest) - 2] = '\0';
strncpy(addrStr, operand1 + 1, strlen(operand1) - 2);
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
int reg = parseRegister(src);
int reg = parseRegister(operand2);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
@@ -510,136 +526,145 @@ int secondPass(const char *source, uint8_t *code) {
code[addr++] = reg;
}
}
// --- ALU Instructions (Arithmetic, INC/DEC, etc.) ---
else if (baseOpcode < 0 && baseOpcode != -2 && baseOpcode != -11 && baseOpcode != -14 && baseOpcode != -15) {
// For arithmetic and INC/DEC instructions, use operand2.
char *dest = strtok(NULL, " ,");
char *src = strtok(NULL, " ,");
if (!dest || !src) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
exit(1);
// --- INC and DEC (baseOpcode == -12 or -13) ---
// These instructions require only a single operand.
else if (baseOpcode == -12 || baseOpcode == -13) {
if (strlen(operand1) == 0) {
fprintf(stderr, "Error: %s requires one operand.\n", mnemonic);
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, src);
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
int regDest = parseRegister(dest);
if (operand1[0] == 'R' || operand1[0] == 'r') {
int reg = parseRegister(operand1);
code[addr++] = reg;
} else {
// Assume memory reference written as "[address]".
char addrStr[32];
strncpy(addrStr, operand1 + 1, strlen(operand1) - 2);
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
}
}
// --- Other Ambiguous ALU Instructions (ADD, SUB, MUL, etc.) ---
// These require two operands (destination and source).
else if (baseOpcode < 0 && baseOpcode != -2 && baseOpcode != -11 &&
baseOpcode != -14 && baseOpcode != -15 && baseOpcode != -12 && baseOpcode != -13) {
if (strlen(operand1) == 0 || strlen(operand2) == 0) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand2);
code[addr++] = resolvedOpcode;
int regDest = parseRegister(operand1);
code[addr++] = regDest;
if (src[0] == 'R' || src[0] == 'r') {
int regSrc = parseRegister(src);
if (operand2[0] == 'R' || operand2[0] == 'r') {
int regSrc = parseRegister(operand2);
code[addr++] = regSrc;
} else {
uint8_t imm = parseImmediate(src);
uint8_t imm = parseImmediate(operand2);
code[addr++] = imm;
}
}
// --- Jump Instructions ---
else if (baseOpcode == -11) { // JMP (ambiguous)
// For JMP, the operand is the jump target.
char *operand = strtok(NULL, " ,");
if (!operand) {
fprintf(stderr, "Error: JMP requires an operand.\n");
exit(1);
// --- JMP Instruction (baseOpcode == -11) ---
else if (baseOpcode == -11) {
if (strlen(operand1) == 0) {
fprintf(stderr, "Error: JMP requires one operand.\n");
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, operand);
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
if (operand[0] == '+' || operand[0] == '-') {
// Relative jump: 1-byte offset.
uint8_t offset = parseImmediate(operand);
if (operand1[0] == '+' || operand1[0] == '-') {
// Relative jump: one-byte offset.
uint8_t offset = parseImmediate(operand1);
code[addr++] = offset;
} else {
// Absolute jump: 32-bit address.
uint32_t jumpAddr = (uint32_t) lookupLabel(operand);
// Absolute jump: use label lookup for 32-bit address.
uint32_t jumpAddr = (uint32_t) lookupLabel(operand1);
code[addr++] = (jumpAddr >> 24) & 0xFF;
code[addr++] = (jumpAddr >> 16) & 0xFF;
code[addr++] = (jumpAddr >> 8) & 0xFF;
code[addr++] = jumpAddr & 0xFF;
}
}
// --- Jump Bit Set/Clear Instructions ---
// --- Jump Bit Set/Clear Instructions (JMPBS, JMPBC) ---
else if (baseOpcode == -14 || baseOpcode == -15) {
// For JMPBS (jump if bit set) or JMPBC (jump if bit clear), the operand specifies the register/memory
// from which to test the bit, followed by the bit value and the jump target.
char *srcOperand = strtok(NULL, " ,"); // register or memory reference
char *bitToken = strtok(NULL, " ,");
char *target = strtok(NULL, " ,");
if (!srcOperand || !bitToken || !target) {
if (strlen(operand1) == 0 || strlen(operand2) == 0 || strlen(operand3) == 0) {
fprintf(stderr, "Error: %s requires three operands.\n", mnemonic);
exit(1);
return 1;
}
int resolvedOpcode = resolveALU(baseOpcode, srcOperand);
int resolvedOpcode = resolveALU(baseOpcode, operand1);
code[addr++] = resolvedOpcode;
// Encode the source operand.
if (srcOperand[0] == 'R' || srcOperand[0] == 'r') {
int reg = parseRegister(srcOperand);
// Encode the source operand (register or memory).
if (operand1[0] == 'R' || operand1[0] == 'r') {
int reg = parseRegister(operand1);
code[addr++] = reg;
} else {
// Memory reference: encode 32-bit address.
char addrStr[32];
strncpy(addrStr, srcOperand + 1, strlen(srcOperand) - 2);
addrStr[strlen(srcOperand) - 2] = '\0';
strncpy(addrStr, operand1 + 1, strlen(operand1) - 2);
addrStr[strlen(operand1) - 2] = '\0';
uint32_t memAddr = (uint32_t) strtoul(addrStr, NULL, 0);
code[addr++] = (memAddr >> 24) & 0xFF;
code[addr++] = (memAddr >> 16) & 0xFF;
code[addr++] = (memAddr >> 8) & 0xFF;
code[addr++] = memAddr & 0xFF;
}
// Encode the bit number (assumed to be a one-byte immediate).
uint8_t bitVal = parseImmediate(bitToken);
// Encode the bit number (a one-byte immediate).
uint8_t bitVal = parseImmediate(operand2);
code[addr++] = bitVal;
// Encode the jump target as a 32-bit address.
uint32_t jumpAddr = (uint32_t) lookupLabel(target);
// Encode the jump target (label -> 32-bit address).
uint32_t jumpAddr = (uint32_t) lookupLabel(operand3);
code[addr++] = (jumpAddr >> 24) & 0xFF;
code[addr++] = (jumpAddr >> 16) & 0xFF;
code[addr++] = (jumpAddr >> 8) & 0xFF;
code[addr++] = jumpAddr & 0xFF;
}
// --- Other Instructions (CMP, SWAP, NEG, NOT, SHL, SHR, SAR, JE, JNE, JG, JL, JGE, JLE, CALL, RET) ---
// --- Non-ambiguous Instructions ---
else if (baseOpcode > 0) {
// For instructions that are not ambiguous, simply encode the opcode and its operands.
switch (baseOpcode) {
case CMP:
case SWAP: { // Two register operands.
char *op1 = strtok(NULL, " ,");
char *op2 = strtok(NULL, " ,");
if (!op1 || !op2) {
case SWAP: {
if (strlen(operand1) == 0 || strlen(operand2) == 0) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
exit(1);
return 1;
}
code[addr++] = baseOpcode;
int r1 = parseRegister(op1);
int r2 = parseRegister(op2);
int r1 = parseRegister(operand1);
int r2 = parseRegister(operand2);
code[addr++] = r1;
code[addr++] = r2;
}
break;
}
case SWAPN:
case NEG_RN:
case NOT_RN: { // Single register operand.
char *op = strtok(NULL, " ,");
if (!op) {
case NOT_RN: {
if (strlen(operand1) == 0) {
fprintf(stderr, "Error: %s requires one operand.\n", mnemonic);
exit(1);
return 1;
}
code[addr++] = baseOpcode;
int reg = parseRegister(op);
int reg = parseRegister(operand1);
code[addr++] = reg;
}
break;
}
case SHL_RN_IMM:
case SHR_RN_IMM:
case SAR_RN_IMM: { // Shift: register and immediate operand.
char *regToken = strtok(NULL, " ,");
char *immToken = strtok(NULL, " ,");
if (!regToken || !immToken) {
case SAR_RN_IMM: {
if (strlen(operand1) == 0 || strlen(operand2) == 0) {
fprintf(stderr, "Error: %s requires two operands.\n", mnemonic);
exit(1);
return 1;
}
code[addr++] = baseOpcode;
int reg = parseRegister(regToken);
int reg = parseRegister(operand1);
code[addr++] = reg;
uint8_t imm = parseImmediate(immToken);
uint8_t imm = parseImmediate(operand2);
code[addr++] = imm;
}
break;
}
case JE:
case JNE:
case JG:
@@ -647,54 +672,58 @@ int secondPass(const char *source, uint8_t *code) {
case JGE:
case JLE:
case CALL: {
// One operand: jump target (label or immediate 32-bit address).
char *operand = strtok(NULL, " ,");
if (!operand) {
fprintf(stderr, "Error: %s requires an operand.\n", mnemonic);
exit(1);
if (strlen(operand1) == 0) {
fprintf(stderr, "Error: %s requires one operand.\n", mnemonic);
return 1;
}
code[addr++] = baseOpcode;
if (!isdigit(operand[0])) {
int labelAddr = lookupLabel(operand);
// If the operand isnt purely numeric, treat it as a label.
if (!isdigit(operand1[0])) {
int labelAddr = lookupLabel(operand1);
if (labelAddr < 0) {
fprintf(stderr, "Error: undefined label '%s'\n", operand);
exit(1);
fprintf(stderr, "Error: undefined label '%s'\n", operand1);
return 1;
}
code[addr++] = (labelAddr >> 24) & 0xFF;
code[addr++] = (labelAddr >> 16) & 0xFF;
code[addr++] = (labelAddr >> 8) & 0xFF;
code[addr++] = labelAddr & 0xFF;
} else {
uint32_t immAddr = (uint32_t) strtoul(operand, NULL, 0);
uint32_t immAddr = (uint32_t) strtoul(operand1, NULL, 0);
code[addr++] = (immAddr >> 24) & 0xFF;
code[addr++] = (immAddr >> 16) & 0xFF;
code[addr++] = (immAddr >> 8) & 0xFF;
code[addr++] = immAddr & 0xFF;
}
}
break;
}
case RET:
case BRK:
case NOP:
case HLT:
case NOP: {
code[addr++] = baseOpcode;
break;
default:
}
default: {
fprintf(stderr, "Error: Unhandled opcode %d\n", baseOpcode);
exit(1);
return 1;
}
}
} else {
fprintf(stderr, "Error: Unknown instruction '%s'\n", mnemonic);
exit(1);
return 1;
}
}
return addr;
}
void completePass(const char *input, CPU *cpu, bool erase) {
// First pass: determine label addresses.
firstPass(input);
if (erase) {
memset(cpu->memory, 0, MEM_SIZE);
init_cpu(cpu);
}
firstPass(input);
secondPass(input, cpu->memory);
}