Browse Source

finish fvm-as

Mistivia 1 week ago
parent
commit
312716a295
9 changed files with 145 additions and 40 deletions
  1. 1 1
      Makefile
  2. 9 0
      src/as_analyzer.c
  3. 1 0
      src/as_analyzer.h
  4. 94 18
      src/as_codegen.c
  5. 2 0
      src/as_codegen.h
  6. 30 6
      src/as_main.c
  7. 6 13
      src/as_op.c
  8. 1 1
      src/as_op.h
  9. 1 1
      src/as_tokenizer.c

+ 1 - 1
Makefile

@@ -11,7 +11,7 @@ obj = $(src:.c=.o)
 tests=$(shell find tests/ -name '*.c')
 tests_bin=$(tests:.c=.bin)
 
-all: $(target) # fvm-as 
+all: $(target) fvm-as 
 
 fvm-as: $(obj) src/as_main.c 
 	$(cc) $(cflags) $(ldflags) $^ -o $@

+ 9 - 0
src/as_analyzer.c

@@ -53,6 +53,15 @@ void sym_table_add(struct allocator * alct, struct sym_table* tbl, const char* n
     tbl->size += 1;
 }
 
+int sym_table_lookup(sym_table* tbl, const char* name) {
+    for (int i = 0; i < tbl->size; i++) {
+        if (strcmp(name, tbl->buf[i].name) == 0) {
+            return tbl->buf[i].offset;
+        }
+    }
+    return -1;
+}
+
 struct sym_table analyze_prog(struct allocator * alct, struct prog * prog) {
     process_section_label(alct, prog);
     struct stmt * * stmts = prog->stmts->stmts;

+ 1 - 0
src/as_analyzer.h

@@ -19,6 +19,7 @@ typedef struct sym_table sym_table;
 
 sym_table new_sym_table(allocator* alct);
 void sym_table_add(allocator* alct, sym_table* tbl, const char* name, int pos);
+int sym_table_lookup(sym_table* tbl, const char* name);
 
 sym_table analyze_prog(allocator* alct, prog* prog);
 

+ 94 - 18
src/as_codegen.c

@@ -1,12 +1,38 @@
 #include "as_codegen.h"
+
+#include <string.h>
+
 #include "as_analyzer.h"
 #include "as_op.h"
 #include "fvm.h"
 #include "utils.h"
 
-bytearray *new_bytearray(allocator *alct);
-void bytearray_emit8(bytearray *self, int8_t data);
-void bytearray_emit64(bytearray *self, int64_t data);
+bytearray *new_bytearray(allocator *alct) {
+    bytearray* arr = allocate(alct, sizeof(bytearray));
+    arr->len = 0;
+    arr->cap = 16;
+    arr->buf = allocate(alct, 16);
+    arr->alct = alct;
+    return arr;
+}
+
+void bytearray_emit8(bytearray *arr, int8_t data) {
+    if (arr->len == arr->cap) {
+        void* oldbuf = arr->buf;
+        arr->buf = allocate(arr->alct, 2 * arr->cap);
+        memcpy(arr->buf, oldbuf, arr->cap);
+        arr->cap = arr->cap * 2;
+    }
+    arr->buf[arr->len] = data;
+    arr->len++;
+}
+
+void bytearray_emit64(bytearray *self, int64_t data) {
+    int8_t* ptr = (int8_t*)&data;
+    for (int i = 0; i < 8; i++) {
+        bytearray_emit8(self, ptr[i]);
+    }
+}
 
 
 int8_t op_bytecode(op op) {
@@ -43,6 +69,12 @@ int8_t op_bytecode(op op) {
     if (op == OP_XOR) return (int8_t)FVM_OP_XOR;
     if (op == OP_INVERT) return (int8_t)FVM_OP_INVERT;
 
+    if (op == OP_JNZ) return (int8_t)FVM_OP_JNZ;
+    if (op == OP_JZ) return (int8_t)FVM_OP_JZ;
+    if (op == OP_JMP) return (int8_t)FVM_OP_JMP;
+    if (op == OP_CALL) return (int8_t)FVM_OP_CALL;
+    if (op == OP_SYSCALL) return (int8_t)FVM_OP_SYSCALL;
+
     if (op == OP_GT) return (int8_t)FVM_OP_GT;
     if (op == OP_LT) return (int8_t)FVM_OP_LT;
     if (op == OP_GE) return (int8_t)FVM_OP_GE;
@@ -86,22 +118,66 @@ result codegen(allocator* alct, prog* prog, sym_table tbl) {
             int8_t code = op_bytecode(op);
             bytearray_emit8(output, code);
             offset += 1;
-        } else if (op_size(op) == 9) {
-            if (op == OP_IMM) {
-                if (instr->tag_name != NULL || instr->arg == NULL) {
-                    return err(safe_sprintf(alct, "line %d: invalid instruction format. (imm)\n", instr->lineno));
-                }
-                bytearray_emit8(output, FVM_OP_IMM);
-                bytearray_emit64(output, instr->arg->ival);
-                offset += 9;
-            } else if (op == OP_REL) {
-                if (instr->tag_name == NULL || instr->arg != NULL) {
-                    return err(safe_sprintf(alct, "line %d: invalid instruction format. (rel)\n", instr->lineno));
-                }
-                bytearray_emit8(output, FVM_OP_IMM);
+        } else if (op == OP_IMM) {
+            if (instr->tag_name != NULL || instr->arg == NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (imm)\n", instr->lineno));
             }
-        } else if (op_size(op) == 10) {
-
+            bytearray_emit8(output, FVM_OP_IMM);
+            bytearray_emit64(output, instr->arg->ival);
+            offset += 9;
+        } else if (op == OP_REL) {
+            if (instr->tag_name == NULL || instr->arg != NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (rel)\n", instr->lineno));
+            }
+            bytearray_emit8(output, FVM_OP_IMM);
+            int target_offset = sym_table_lookup(&tbl, instr->tag_name);
+            if (target_offset == -1) {
+                return err(safe_sprintf(alct, "line %d: unknown tag: %s", instr->lineno, instr->tag_name));
+            }
+            bytearray_emit64(output, target_offset - (offset + 9));
+            bytearray_emit8(output, FVM_OP_PC);
+            bytearray_emit8(output, FVM_OP_ADD);
+            offset += 11;
+        } else if (op == OP_LDARG) {
+            if (instr->tag_name != NULL || instr->arg == NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (ldarg)\n", instr->lineno));
+            }
+            bytearray_emit8(output, FVM_OP_IMM);
+            bytearray_emit64(output, 8 * (instr->arg->ival + 2));
+            bytearray_emit8(output, FVM_OP_BP);
+            bytearray_emit8(output, FVM_OP_ADD);
+            bytearray_emit8(output, FVM_OP_LD);
+            offset += 12;
+        } else if (op == OP_STARG) {
+            if (instr->tag_name != NULL || instr->arg == NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (starg)\n", instr->lineno));
+            }
+            bytearray_emit8(output, FVM_OP_IMM);
+            bytearray_emit64(output, 8 * (instr->arg->ival + 2));
+            bytearray_emit8(output, FVM_OP_BP);
+            bytearray_emit8(output, FVM_OP_ADD);
+            bytearray_emit8(output, FVM_OP_ST);
+            offset += 12;
+        } else if (op == OP_LDVAR) {
+            if (instr->tag_name != NULL || instr->arg == NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (ldvar)\n", instr->lineno));
+            }
+            bytearray_emit8(output, FVM_OP_IMM);
+            bytearray_emit64(output, 8 * (-instr->arg->ival - 1));
+            bytearray_emit8(output, FVM_OP_BP);
+            bytearray_emit8(output, FVM_OP_ADD);
+            bytearray_emit8(output, FVM_OP_LD);
+            offset += 12;
+        } else if (op == OP_STVAR) {
+            if (instr->tag_name != NULL || instr->arg == NULL) {
+                return err(safe_sprintf(alct, "line %d: invalid instruction format. (stvar)\n", instr->lineno));
+            }
+            bytearray_emit8(output, FVM_OP_IMM);
+            bytearray_emit64(output, 8 * (-instr->arg->ival - 2));
+            bytearray_emit8(output, FVM_OP_BP);
+            bytearray_emit8(output, FVM_OP_ADD);
+            bytearray_emit8(output, FVM_OP_ST);
+            offset += 12;
         }
     }
     return ok(output);

+ 2 - 0
src/as_codegen.h

@@ -8,12 +8,14 @@ struct bytearray {
     size_t cap;
     size_t len;
     char* buf;
+    allocator *alct;
 };
 typedef struct bytearray bytearray;
 
 void bytearray_emit8(bytearray *self, int8_t data);
 void bytearray_emit64(bytearray *self, int64_t data);
 
+// return: result<bytearray*>
 struct result codegen(struct allocator *alct, struct prog *prog, struct sym_table tbl);
 
 #endif // FVM_AS_CODEGEN_H_

+ 30 - 6
src/as_main.c

@@ -1,18 +1,42 @@
 #include <stdio.h>
 
 #include "as_tokenizer.h"
+#include "as_analyzer.h"
+#include "as_parser.h"
+#include "as_codegen.h"
 #include "utils.h"
 
-int main(int argc, char** argv) {
-    if (argc != 2) {
-        fprintf(stderr, "usage: fvm-as <inputfile>\n");
-        return 1;
+result main_impl(int argc, char** argv) {
+    if (argc != 3) {
+        return err("usage: fvm-as <input_file> <output_file>");
     }
     struct allocator * alct = new_allocator();
     FILE *fp = fopen(argv[1], "r");
-    struct token_stream * ts = new_token_stream(alct, fp);
+    token_stream * ts = new_token_stream(alct, fp);
+    prog* prog = unwrap(parse_prog(alct, ts));
+    sym_table symtbl = analyze_prog(alct, prog);
+    bytearray* output = unwrap(codegen(alct, prog, symtbl));
+    fclose(fp);
+
+    fp = fopen(argv[2], "wb");
+    if (fp == NULL) {
+        return err("open output file failed.");
+    }
+    int ret  =fwrite(output->buf, 1, output->len, fp);
+    if (ret != output->len) {
+        return err("write output file failed.");
+    }
+    fclose(fp);
 
     delete_allocator(alct);
-    return 0;
+    return ok(NULL);
 }
 
+int main(int argc, char** argv) {
+    result result = main_impl(argc, argv);
+    if (result.errmsg != NULL) {
+        fprintf(stderr, "%s\n", result.errmsg);
+        return -1;
+    }
+    return 0;
+}

+ 6 - 13
src/as_op.c

@@ -94,7 +94,7 @@ struct op_table_entry op_table [] = {
 };
 
 int op_size(enum op op) {
-    if (op == OP_IMM || op == OP_REL) {
+    if (op == OP_IMM) {
         return 8 + 1;
     }
     if (op == OP_CALL
@@ -104,23 +104,16 @@ int op_size(enum op op) {
             || op == OP_SYSCALL) {
         return 8 + 1 + 1;
     }
-    if (op == OP_LDARG || op == OP_STARG) {
-        return 9 + 9 + 1 + 9 + 4;
-    }
-    if (op == OP_LDVAR || op == OP_STVAR) {
-        return 9 + 9 + 2 + 9 + 4;
+    if (op == OP_REL) {
+        return 11;
     }
-    return 1;
-}
-
-int is_pseudo_op(enum op op) {
     if (op == OP_LDARG || op == OP_STARG) {
-        return 1;
+        return 12;
     }
     if (op == OP_LDVAR || op == OP_STVAR) {
-        return 1;
+        return 12;
     }
-    return 0;
+    return 1;
 }
 
 enum op str2op(const char* str) {

+ 1 - 1
src/as_op.h

@@ -23,7 +23,7 @@ enum op {
 typedef enum op op;
 
 enum op str2op(const char *str);
-int isOp(const char *str);
+int is_op(const char *str);
 int op_size(op op);
 int is_pseudo_op(op op);
 

+ 1 - 1
src/as_tokenizer.c

@@ -124,7 +124,7 @@ result next_token_impl(allocator* alct, input_stream* s) {
                 }
             }
             sval[i] = '\0';
-            if (isOp(sval)) {
+            if (is_op(sval)) {
                 *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col};
                 return ok(t);
             }