diff options
| author | Mistivia <i@mistivia.com> | 2025-03-26 19:11:10 +0800 |
|---|---|---|
| committer | Mistivia <i@mistivia.com> | 2025-03-26 19:11:10 +0800 |
| commit | 97d4462ac24b726d9313ec52ca0f11711ead553b (patch) | |
| tree | 1bba7f6d4f2690d673b810bda4ec34523034bcaa | |
| parent | 312716a295626f2b60b41777728c7f220fee843d (diff) | |
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | src/as_analyzer.c | 82 | ||||
| -rw-r--r-- | src/as_analyzer.h | 26 | ||||
| -rw-r--r-- | src/as_codegen.c | 185 | ||||
| -rw-r--r-- | src/as_codegen.h | 21 | ||||
| -rw-r--r-- | src/as_main.c | 42 | ||||
| -rw-r--r-- | src/as_op.c | 130 | ||||
| -rw-r--r-- | src/as_op.h | 31 | ||||
| -rw-r--r-- | src/as_parser.c | 149 | ||||
| -rw-r--r-- | src/as_parser.h | 62 | ||||
| -rw-r--r-- | src/as_tokenizer.c | 193 | ||||
| -rw-r--r-- | src/as_tokenizer.h | 52 | ||||
| -rw-r--r-- | tests/test_as_analyzer.c | 45 | ||||
| -rw-r--r-- | tests/test_as_parser.c | 47 | ||||
| -rw-r--r-- | tests/test_as_tokenizer.c | 65 |
15 files changed, 1 insertions, 1134 deletions
@@ -11,10 +11,7 @@ obj = $(src:.c=.o) tests=$(shell find tests/ -name '*.c') tests_bin=$(tests:.c=.bin) -all: $(target) fvm-as - -fvm-as: $(obj) src/as_main.c - $(cc) $(cflags) $(ldflags) $^ -o $@ +all: $(target) full: all $(tests_bin) diff --git a/src/as_analyzer.c b/src/as_analyzer.c deleted file mode 100644 index 5a75794..0000000 --- a/src/as_analyzer.c +++ /dev/null @@ -1,82 +0,0 @@ -#include "as_analyzer.h" - -#include <stddef.h> -#include <string.h> - -const char * compose_section_label(struct allocator * alct, const char * section, const char * name) { - size_t section_len = strlen(section); - size_t name_len = strlen(name); - size_t sz = section_len + name_len; - char * buf = allocate(alct, sz + 1); - memcpy(buf, section, section_len); - memcpy(buf + section_len, name, name_len); - buf[sz] = '\0'; - return buf; -} - -void process_section_label(struct allocator * alct, struct prog * prog) { - const char * section = ""; - struct stmt ** stmts = prog->stmts->stmts; - for (size_t i = 0; ; i++) { - if (stmts[i] == NULL) break; - if (stmts[i]->label == NULL) continue; - const char* name = stmts[i]->label->name; - if (name[0] == '.') { - stmts[i]->label->name = compose_section_label(alct, section, name); - } else { - section = name; - continue; - } - } -} - -size_t instr_size(struct instr * instr) { - return op_size(instr->op); -} - -struct sym_table new_sym_table(struct allocator * alct) { - struct sym_table tbl; - tbl.cap = 16; - tbl.size = 0; - tbl.buf = allocate(alct, sizeof(struct sym_table_entry) * 16); - return tbl; -} - -void sym_table_add(struct allocator * alct, struct sym_table* tbl, const char* name, int pos) { - if (tbl->cap == tbl->size) { - void *old_buf = tbl->buf; - tbl->buf = allocate(alct, sizeof(struct sym_table_entry) * tbl->cap * 2); - memcpy(tbl->buf, old_buf, sizeof(struct sym_table_entry) * tbl->cap); - tbl->cap = tbl->cap * 2; - } - tbl->buf[tbl->size] = (struct sym_table_entry){.name = name, .offset = pos,}; - tbl->size += 1; -} - -int sym_table_lookup(sym_table* tbl, const char* name) { - for (int i = 0; i < tbl->size; i++) { - if (strcmp(name, tbl->buf[i].name) == 0) { - return tbl->buf[i].offset; - } - } - return -1; -} - -struct sym_table analyze_prog(struct allocator * alct, struct prog * prog) { - process_section_label(alct, prog); - struct stmt * * stmts = prog->stmts->stmts; - struct sym_table tbl = new_sym_table(alct); - size_t cur_pos = 0; - for (int i = 0; ; i++) { - if (stmts[i] == NULL) break; - struct stmt * stmt = stmts[i]; - if (stmt->label) { - sym_table_add(alct, &tbl, stmt->label->name, cur_pos); - } - if (stmt->instr) { - cur_pos += instr_size(stmt->instr); - } - } - return tbl; -} - diff --git a/src/as_analyzer.h b/src/as_analyzer.h deleted file mode 100644 index a1f781e..0000000 --- a/src/as_analyzer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef FVM_AS_ANALYZER_H_ -#define FVM_AS_ANALYZER_H_ - -#include "as_parser.h" -#include "utils.h" - -struct sym_table_entry { - const char * name; - size_t offset; -}; -typedef struct sym_table_entry sym_table_entry; - -struct sym_table { - int size; - int cap; - struct sym_table_entry *buf; -}; -typedef struct sym_table sym_table; - -sym_table new_sym_table(allocator* alct); -void sym_table_add(allocator* alct, sym_table* tbl, const char* name, int pos); -int sym_table_lookup(sym_table* tbl, const char* name); - -sym_table analyze_prog(allocator* alct, prog* prog); - -#endif // FVM_AS_ANALYZER_H_ diff --git a/src/as_codegen.c b/src/as_codegen.c deleted file mode 100644 index 678733f..0000000 --- a/src/as_codegen.c +++ /dev/null @@ -1,185 +0,0 @@ -#include "as_codegen.h" - -#include <string.h> - -#include "as_analyzer.h" -#include "as_op.h" -#include "fvm.h" -#include "utils.h" - -bytearray *new_bytearray(allocator *alct) { - bytearray* arr = allocate(alct, sizeof(bytearray)); - arr->len = 0; - arr->cap = 16; - arr->buf = allocate(alct, 16); - arr->alct = alct; - return arr; -} - -void bytearray_emit8(bytearray *arr, int8_t data) { - if (arr->len == arr->cap) { - void* oldbuf = arr->buf; - arr->buf = allocate(arr->alct, 2 * arr->cap); - memcpy(arr->buf, oldbuf, arr->cap); - arr->cap = arr->cap * 2; - } - arr->buf[arr->len] = data; - arr->len++; -} - -void bytearray_emit64(bytearray *self, int64_t data) { - int8_t* ptr = (int8_t*)&data; - for (int i = 0; i < 8; i++) { - bytearray_emit8(self, ptr[i]); - } -} - - -int8_t op_bytecode(op op) { - if (op == OP_SSP) return (int8_t)FVM_OP_SSP; - if (op == OP_SP) return (int8_t)FVM_OP_SP; - if (op == OP_SBP) return (int8_t)FVM_OP_SBP; - if (op == OP_BP) return (int8_t)FVM_OP_BP; - if (op == OP_PC) return (int8_t)FVM_OP_PC; - if (op == OP_RV) return (int8_t)FVM_OP_RV; - if (op == OP_SRV) return (int8_t)FVM_OP_SRV; - - if (op == OP_DUP) return (int8_t)FVM_OP_DUP; - if (op == OP_POP) return (int8_t)FVM_OP_POP; - if (op == OP_SWAP) return (int8_t)FVM_OP_SWAP; - if (op == OP_OVER) return (int8_t)FVM_OP_OVER; - if (op == OP_ROT) return (int8_t)FVM_OP_ROT; - - if (op == OP_ADD) return (int8_t)FVM_OP_ADD; - if (op == OP_SUB) return (int8_t)FVM_OP_SUB; - if (op == OP_DIV) return (int8_t)FVM_OP_DIV; - if (op == OP_MUL) return (int8_t)FVM_OP_MUL; - if (op == OP_MOD) return (int8_t)FVM_OP_MOD; - - if (op == OP_SHR) return (int8_t)FVM_OP_SHR; - if (op == OP_SHL) return (int8_t)FVM_OP_SHL; - if (op == OP_SAR) return (int8_t)FVM_OP_SAR; - - if (op == OP_AND) return (int8_t)FVM_OP_AND; - if (op == OP_OR) return (int8_t)FVM_OP_OR; - if (op == OP_NOT) return (int8_t)FVM_OP_NOT; - - if (op == OP_BITAND) return (int8_t)FVM_OP_BITAND; - if (op == OP_BITOR) return (int8_t)FVM_OP_BITOR; - if (op == OP_XOR) return (int8_t)FVM_OP_XOR; - if (op == OP_INVERT) return (int8_t)FVM_OP_INVERT; - - if (op == OP_JNZ) return (int8_t)FVM_OP_JNZ; - if (op == OP_JZ) return (int8_t)FVM_OP_JZ; - if (op == OP_JMP) return (int8_t)FVM_OP_JMP; - if (op == OP_CALL) return (int8_t)FVM_OP_CALL; - if (op == OP_SYSCALL) return (int8_t)FVM_OP_SYSCALL; - - if (op == OP_GT) return (int8_t)FVM_OP_GT; - if (op == OP_LT) return (int8_t)FVM_OP_LT; - if (op == OP_GE) return (int8_t)FVM_OP_GE; - if (op == OP_LE) return (int8_t)FVM_OP_LE; - if (op == OP_EQ) return (int8_t)FVM_OP_EQ; - if (op == OP_NEQ) return (int8_t)FVM_OP_NEQ; - - if (op == OP_RET) return (int8_t)FVM_OP_RET; - - if (op == OP_FADD) return (int8_t)FVM_OP_FADD; - if (op == OP_FSUB) return (int8_t)FVM_OP_FSUB; - if (op == OP_FDIV) return (int8_t)FVM_OP_FDIV; - if (op == OP_FMUL) return (int8_t)FVM_OP_FMUL; - - if (op == OP_FGT) return (int8_t)FVM_OP_FGT; - if (op == OP_FLT) return (int8_t)FVM_OP_FLT; - if (op == OP_FGE) return (int8_t)FVM_OP_FGE; - if (op == OP_FLE) return (int8_t)FVM_OP_FLE; - if (op == OP_FEQ) return (int8_t)FVM_OP_FEQ; - if (op == OP_FNEQ) return (int8_t)FVM_OP_FNEQ; - - if (op == OP_FTI) return (int8_t)FVM_OP_FTI; - if (op == OP_ITF) return (int8_t)FVM_OP_ITF; - - if (op == OP_EXIT) return (int8_t)FVM_OP_EXIT; - - return (int8_t)-1; -} - -result codegen(allocator* alct, prog* prog, sym_table tbl) { - stmt** stmts = prog->stmts->stmts; - size_t offset = 0; - bytearray* output = new_bytearray(alct); - for (int i = 0; ; i++) { - if (stmts[i] == NULL) { - break; - } - instr* instr = stmts[i]->instr; - op op = instr->op; - if (op_size(op) == 1) { - int8_t code = op_bytecode(op); - bytearray_emit8(output, code); - offset += 1; - } else if (op == OP_IMM) { - if (instr->tag_name != NULL || instr->arg == NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (imm)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - bytearray_emit64(output, instr->arg->ival); - offset += 9; - } else if (op == OP_REL) { - if (instr->tag_name == NULL || instr->arg != NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (rel)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - int target_offset = sym_table_lookup(&tbl, instr->tag_name); - if (target_offset == -1) { - return err(safe_sprintf(alct, "line %d: unknown tag: %s", instr->lineno, instr->tag_name)); - } - bytearray_emit64(output, target_offset - (offset + 9)); - bytearray_emit8(output, FVM_OP_PC); - bytearray_emit8(output, FVM_OP_ADD); - offset += 11; - } else if (op == OP_LDARG) { - if (instr->tag_name != NULL || instr->arg == NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (ldarg)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - bytearray_emit64(output, 8 * (instr->arg->ival + 2)); - bytearray_emit8(output, FVM_OP_BP); - bytearray_emit8(output, FVM_OP_ADD); - bytearray_emit8(output, FVM_OP_LD); - offset += 12; - } else if (op == OP_STARG) { - if (instr->tag_name != NULL || instr->arg == NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (starg)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - bytearray_emit64(output, 8 * (instr->arg->ival + 2)); - bytearray_emit8(output, FVM_OP_BP); - bytearray_emit8(output, FVM_OP_ADD); - bytearray_emit8(output, FVM_OP_ST); - offset += 12; - } else if (op == OP_LDVAR) { - if (instr->tag_name != NULL || instr->arg == NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (ldvar)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - bytearray_emit64(output, 8 * (-instr->arg->ival - 1)); - bytearray_emit8(output, FVM_OP_BP); - bytearray_emit8(output, FVM_OP_ADD); - bytearray_emit8(output, FVM_OP_LD); - offset += 12; - } else if (op == OP_STVAR) { - if (instr->tag_name != NULL || instr->arg == NULL) { - return err(safe_sprintf(alct, "line %d: invalid instruction format. (stvar)\n", instr->lineno)); - } - bytearray_emit8(output, FVM_OP_IMM); - bytearray_emit64(output, 8 * (-instr->arg->ival - 2)); - bytearray_emit8(output, FVM_OP_BP); - bytearray_emit8(output, FVM_OP_ADD); - bytearray_emit8(output, FVM_OP_ST); - offset += 12; - } - } - return ok(output); -} - diff --git a/src/as_codegen.h b/src/as_codegen.h deleted file mode 100644 index 4265f07..0000000 --- a/src/as_codegen.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef FVM_AS_CODEGEN_H_ -#define FVM_AS_CODEGEN_H_ - -#include "as_analyzer.h" -#include <stdlib.h> - -struct bytearray { - size_t cap; - size_t len; - char* buf; - allocator *alct; -}; -typedef struct bytearray bytearray; - -void bytearray_emit8(bytearray *self, int8_t data); -void bytearray_emit64(bytearray *self, int64_t data); - -// return: result<bytearray*> -struct result codegen(struct allocator *alct, struct prog *prog, struct sym_table tbl); - -#endif // FVM_AS_CODEGEN_H_ diff --git a/src/as_main.c b/src/as_main.c deleted file mode 100644 index cd9251b..0000000 --- a/src/as_main.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <stdio.h> - -#include "as_tokenizer.h" -#include "as_analyzer.h" -#include "as_parser.h" -#include "as_codegen.h" -#include "utils.h" - -result main_impl(int argc, char** argv) { - if (argc != 3) { - return err("usage: fvm-as <input_file> <output_file>"); - } - struct allocator * alct = new_allocator(); - FILE *fp = fopen(argv[1], "r"); - token_stream * ts = new_token_stream(alct, fp); - prog* prog = unwrap(parse_prog(alct, ts)); - sym_table symtbl = analyze_prog(alct, prog); - bytearray* output = unwrap(codegen(alct, prog, symtbl)); - fclose(fp); - - fp = fopen(argv[2], "wb"); - if (fp == NULL) { - return err("open output file failed."); - } - int ret =fwrite(output->buf, 1, output->len, fp); - if (ret != output->len) { - return err("write output file failed."); - } - fclose(fp); - - delete_allocator(alct); - return ok(NULL); -} - -int main(int argc, char** argv) { - result result = main_impl(argc, argv); - if (result.errmsg != NULL) { - fprintf(stderr, "%s\n", result.errmsg); - return -1; - } - return 0; -} diff --git a/src/as_op.c b/src/as_op.c deleted file mode 100644 index a50812a..0000000 --- a/src/as_op.c +++ /dev/null @@ -1,130 +0,0 @@ -#include "as_op.h" - -#include <string.h> - -struct op_table_entry { - enum op op; - const char* name; -}; - - -struct op_table_entry op_table [] = { - // OP_SP, OP_SSP, OP_BP, OP_SBP, OP_PC, OP_RV, OP_SRV, - {OP_SP, "sp"}, - {OP_SSP, "ssp"}, - {OP_BP, "bp"}, - {OP_SBP, "sbp"}, - {OP_PC, "pc"}, - {OP_RV, "rv"}, - {OP_SRV, "srv"}, - // OP_IMM, - {OP_IMM, "imm"}, - {OP_IMM, "rel"}, - // OP_LD8, OP_LD16, OP_LD32, OP_LD, - {OP_LD8, "ld8"}, - {OP_LD16, "ld16"}, - {OP_LD32, "ld32"}, - {OP_LD, "ld"}, - // OP_ST8, OP_ST16, OP_ST32, OP_ST, - {OP_ST8, "st8"}, - {OP_ST16, "st16"}, - {OP_ST32, "st32"}, - {OP_ST, "st"}, - // OP_DUP, OP_POP, OP_SWAP, OP_OVER, OP_ROT, - {OP_DUP, "dup"}, - {OP_POP, "pop"}, - {OP_SWAP, "swap"}, - {OP_OVER, "over"}, - {OP_ROT, "rot"}, - // OP_ADD, OP_SUB, OP_DIV, OP_MUL, OP_MOD, - {OP_ADD, "add"}, - {OP_SUB, "sub"}, - {OP_DIV, "div"}, - {OP_MUL, "mul"}, - {OP_MOD, "mod"}, - // OP_SHR, OP_SHL, OP_SAR, - {OP_SHR, "shr"}, - {OP_SHL, "shl"}, - {OP_SAR, "sar"}, - // OP_AND, OP_OR, OP_NOT, - {OP_AND, "and"}, - {OP_OR, "or"}, - {OP_NOT, "not"}, - // OP_BITAND, OP_BITOR, OP_XOR, OP_INVERT, - {OP_BITAND, "bitand"}, - {OP_BITOR, "bitor"}, - {OP_XOR, "xor"}, - {OP_INVERT, "invert"}, - // OP_GT, OP_LT, OP_GE, OP_LE, OP_EQ, OP_NEQ, - {OP_GT, "gt"}, - {OP_LT, "lt"}, - {OP_GE, "ge"}, - {OP_LE, "le"}, - {OP_EQ, "eq"}, - {OP_NEQ, "neq"}, - // OP_JMP, OP_JZ, OP_JNZ, OP_RET, OP_CALL, OP_SYSCALL, - {OP_JMP, "jmp"}, - {OP_JZ, "jz"}, - {OP_JNZ, "jnz"}, - {OP_RET, "ret"}, - {OP_CALL, "call"}, - {OP_SYSCALL, "syscall"}, - // OP_FADD, OP_FSUB, OP_FMUL, OP_FDIV, - {OP_FADD, "fadd"}, - {OP_FSUB, "fsub"}, - {OP_FMUL, "fmul"}, - {OP_FDIV, "fdiv"}, - // OP_FGE, OP_FGT, OP_FLE, OP_FLT, OP_FEQ, OP_FNEQ, - {OP_FGE, "fge"}, - {OP_FGT, "fgt"}, - {OP_FLT, "flt"}, - {OP_FEQ, "feq"}, - {OP_FNEQ, "fneq"}, - // OP_FTI, OP_ITF, - {OP_FTI, "fti"}, - {OP_ITF, "itf"}, - // OP_EXIT, - {OP_EXIT, "exit"}, - // OP_LDARG, OP_LDVAR, OP_STARG, OP_STVAR, - {OP_LDARG, "ldarg"}, - {OP_LDVAR, "ldvar"}, - {OP_STARG, "starg"}, - {OP_STVAR, "stvar"}, - {OP_END, NULL}, -}; - -int op_size(enum op op) { - if (op == OP_IMM) { - return 8 + 1; - } - if (op == OP_CALL - || op == OP_JMP - || op == OP_JNZ - || op == OP_JZ - || op == OP_SYSCALL) { - return 8 + 1 + 1; - } - if (op == OP_REL) { - return 11; - } - if (op == OP_LDARG || op == OP_STARG) { - return 12; - } - if (op == OP_LDVAR || op == OP_STVAR) { - return 12; - } - return 1; -} - -enum op str2op(const char* str) { - for (int i = 0; op_table[i].name != NULL; i++) { - if (strcmp(op_table[i].name, str) == 0) { - return op_table[i].op; - } - } - return OP_END; -} - -int is_op(const char *str) { - return OP_END != str2op(str); -} diff --git a/src/as_op.h b/src/as_op.h deleted file mode 100644 index 90080d0..0000000 --- a/src/as_op.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef FVM_AS_OP_H_ -#define FVM_AS_OP_H_ - -enum op { - OP_SP, OP_SSP, OP_BP, OP_SBP, OP_PC, OP_RV, OP_SRV, - OP_IMM, OP_REL, - OP_LD8, OP_LD16, OP_LD32, OP_LD, - OP_ST8, OP_ST16, OP_ST32, OP_ST, - OP_DUP, OP_POP, OP_SWAP, OP_OVER, OP_ROT, - OP_ADD, OP_SUB, OP_DIV, OP_MUL, OP_MOD, - OP_SHR, OP_SHL, OP_SAR, - OP_AND, OP_OR, OP_NOT, - OP_BITAND, OP_BITOR, OP_XOR, OP_INVERT, - OP_GT, OP_LT, OP_GE, OP_LE, OP_EQ, OP_NEQ, - OP_JMP, OP_JZ, OP_JNZ, OP_RET, OP_CALL, OP_SYSCALL, - OP_FADD, OP_FSUB, OP_FMUL, OP_FDIV, - OP_FGE, OP_FGT, OP_FLE, OP_FLT, OP_FEQ, OP_FNEQ, - OP_FTI, OP_ITF, - OP_EXIT, - OP_LDARG, OP_LDVAR, OP_STARG, OP_STVAR, - OP_END -}; -typedef enum op op; - -enum op str2op(const char *str); -int is_op(const char *str); -int op_size(op op); -int is_pseudo_op(op op); - -#endif - diff --git a/src/as_parser.c b/src/as_parser.c deleted file mode 100644 index 4205179..0000000 --- a/src/as_parser.c +++ /dev/null @@ -1,149 +0,0 @@ -#include "as_parser.h" -#include "as_tokenizer.h" -#include "utils.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// BNF -// === -// -// <prog> ::= <stmts> -// <stmts> ::= <stmt> newline | <stmt> newline <stmts> -// <stmt> ::= <label> <instr> | <instr> | <label> -// <instr> ::= <op> | <op> arg | <op> tag -// <label> ::= tag ":" -// <op> ::= "add" | "sub" | "mul" | "div" | "mod" | "eq" | ... - -result parse_prog(allocator* alct, token_stream* ts) { - struct prog * p = allocate(alct, sizeof(struct prog)); - p->stmts = unwrap(parse_stmts(alct, ts)); - return ok(p); -} - -result parse_stmts(allocator* alct, token_stream* ts) { - token *token; - stmts* ss = allocate(alct, sizeof(stmts)); - stmt* s; - - ss->stmts = allocate(alct, sizeof(stmt*)); - ss->stmts[0] = NULL; - int capacity = 0; - int len = 0; - - while (1) { - token = unwrap(peek_token(alct, ts)); - if (token->type == TK_ENDOFFILE) { - break; - } - - s = unwrap(parse_stmt(alct, ts)); - if (s == NULL) continue; - if (len == capacity) { - size_t new_capacity = capacity * 2 + 1; - void* new_stmts = allocate(alct, sizeof(struct stmt **) * new_capacity); - memcpy(new_stmts, ss->stmts, sizeof(struct stmt **) * capacity); - ss->stmts = new_stmts; - capacity = new_capacity; - } - // expect newline - token = unwrap(peek_token(alct, ts)); - if (token->type == TK_NEWLINE) { - unwrap(next_token(alct, ts)); - } else { - return err(safe_sprintf(alct, "%d:%d expect newline.\n", token->line, token->col)); - } - ss->stmts[len] = s; - len++; - } - ss->stmts[len] = NULL; - return ok(ss); -} - -result parse_label(allocator* alct, token_stream* ts) { - struct token * t; - t = unwrap(next_token(alct, ts)); - if (t->type != TK_TAG) { - return err(safe_sprintf(alct, "%d:%d expect label.\n", t->line, t->col)); - } - struct label * l = allocate(alct, sizeof(struct label *)); - l->name = t->sval; - t = unwrap(next_token(alct, ts)); - if (t->type != TK_COLON) { - return err(safe_sprintf(alct, "%d:%d expect colon.\n", t->line, t->col)); - } - return ok(l); -} - -result parse_stmt(allocator* alct, token_stream* ts) { - const char *errmsg; - token* token; - token = unwrap(peek_token(alct, ts)); - stmt* stmt = allocate(alct, sizeof(struct stmt)); - stmt->label = NULL; - stmt->instr = NULL; - if (token->type == TK_TAG) { - stmt->label = unwrap(parse_label(alct, ts)); - token = unwrap(peek_token(alct, ts)); - if (token->type == TK_NEWLINE) { - return ok(stmt); - } - token = unwrap(peek_token(alct, ts)); - } - if (token->type == TK_OP) { - stmt->instr = unwrap(parse_instr(alct, ts)); - token = unwrap(peek_token(alct, ts)); - if (token->type == TK_NEWLINE) { - return ok(stmt); - } - } - if (token->type == TK_NEWLINE) { - return ok(NULL); - } - return err(safe_sprintf(alct, "%d:%d expect lable + instruction, lable, or instruction.\n", token->line, token->col)); -} - -result parse_op(allocator* alct, token_stream* ts) { - token* t; - t = unwrap(next_token(alct, ts)); - enum op op; - if (t->type == TK_OP) { - op = str2op(t->sval); - if (op == OP_END) { - return err(safe_sprintf(alct, "%d:%d invalid op.\n", t->line, t->col)); - } - } else { - return err(safe_sprintf(alct, "%d:%d expect op.\n", t->line, t->col)); - } - return ok((void*)op); -} - -result parse_instr(allocator* alct, token_stream* ts) { - token* t; - t = unwrap(peek_token(alct, ts)); - instr * i = allocate(alct, sizeof(instr)); - *i = (instr){ - .tag_name = NULL, - .arg = NULL, - .op = OP_END, - .lineno = -1 - }; - if (t->type == TK_OP) { - i->lineno = t->line; - i->op = (op)unwrap(parse_op(alct, ts)); - t = unwrap(peek_token(alct, ts)); - if (t->type == TK_ARG) { - arg* a = allocate(alct, sizeof(arg)); - a->ival = t->ival; - a->fval = t->fval; - i->arg = a; - unwrap(next_token(alct, ts)); - } else if (t->type == TK_TAG) { - i->tag_name = t->sval; - unwrap(next_token(alct, ts)); - } - } - return ok(i); -} - diff --git a/src/as_parser.h b/src/as_parser.h deleted file mode 100644 index f1d3b62..0000000 --- a/src/as_parser.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef FVM_AS_PARSER_H_ -#define FVM_AS_PARSER_H_ - -#include "as_tokenizer.h" -#include "utils.h" - -#include "as_op.h" - -struct arg { - int64_t ival; - double fval; -}; -typedef struct arg arg; - -struct instr { - enum op op; - arg* arg; - const char* tag_name; - int lineno; -}; -typedef struct instr instr; - -struct label { - const char* name; -}; -typedef struct label label; - -struct stmt { - struct label * label; - struct instr * instr; -}; -typedef struct stmt stmt; - -struct stmts { - struct stmt ** stmts; -}; -typedef struct stmts stmts; - -struct prog { - struct stmts * stmts; -}; -typedef struct prog prog; - -// result<prog> -result parse_prog(allocator* alct, token_stream* ts); - -// result<stmt> -result parse_stmt(allocator* alct, token_stream* ts); - -// result<stmts> -result parse_stmts(allocator* alct, token_stream* ts); - -// result<instr> -result parse_instr(allocator* alct, token_stream* ts); - -// result<label> -result parse_label(allocator* alct, token_stream* ts); - -// result<enum op> -result parse_op(allocator* alct, token_stream* ts); - -#endif diff --git a/src/as_tokenizer.c b/src/as_tokenizer.c deleted file mode 100644 index 1651ccb..0000000 --- a/src/as_tokenizer.c +++ /dev/null @@ -1,193 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "as_tokenizer.h" -#include "as_op.h" -#include "utils.h" - -int input_stream_next_char(input_stream* s) { - if (s->cursor == -1) { - return EOF; - } - if (s->buf_pos == s->cursor) { - size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); - if (n == 0) { - s->cursor = -1; - return EOF; - } - s->buf_pos = n; - s->cursor = 0; - } - int c = s->buf[s->cursor]; - s->cursor++; - if (c == '\n') { - s->line++; - s->col = 1; - } else { - s->col++; - } - return c; -} - -int input_stream_peek_char(input_stream* s) { - if (s->cursor == -1) { - return EOF; - } - if (s->buf_pos == s->cursor) { - size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); - if (n == 0) { - return EOF; - } - s->buf_pos = n; - s->cursor = 0; - } - return s->buf[s->cursor]; -} - -int is_start_of_identifier(int c) { - if (c >= 'a' && c <= 'z') { - return 1; - } - if (c >= 'A' && c <= 'Z') { - return 1; - } - if (c == '_') { - return 1; - } - if (c == '.') { - return 1; - } - return 0; -} - -int is_part_of_identifier(int c) { - if (is_start_of_identifier(c)) { - return 1; - } - if (c >= '0' && c <= '9') { - return 1; - } - return 0; -} - -result next_token_impl(allocator* alct, input_stream* s) { - const char *errmsg; - token* t = allocate(alct, sizeof(token)); - int c; - while (1) { - c = input_stream_peek_char(s); - if (c == EOF) { - break; - } - if (c == '\n') { - input_stream_next_char(s); - *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col}; - return ok(t); - } - if (c == ':') { - input_stream_next_char(s); - *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col}; - return ok(t); - } - if (c == ' ' || c == '\t') { - input_stream_next_char(s); - continue; - } - if (c >= '0' && c <= '9') { - int64_t ival = 0; - while (1) { - input_stream_next_char(s); - ival = ival * 10 + (c - '0'); - c = input_stream_peek_char(s); - if (c < '0' || c > '9') { - break; - } - } - *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col}; - return ok(t); - } - if (is_start_of_identifier(c)) { - size_t line = s->line; - size_t col = s->col; - char *sval = allocate(alct, 256); - size_t i = 0; - while (1) { - if (i >= 255) { - return err(safe_sprintf(alct, "error: identifier too long\n")); - } - input_stream_next_char(s); - sval[i++] = c; - c = input_stream_peek_char(s); - if (!is_part_of_identifier(c)) { - break; - } - } - sval[i] = '\0'; - if (is_op(sval)) { - *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col}; - return ok(t); - } - *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col}; - return ok(t); - } - return err(safe_sprintf(alct, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col)); - } - // end of file - *t = (struct token){.type = TK_ENDOFFILE}; - return ok(t); -} - -result next_token(allocator* alct, token_stream* ts) { - if (ts->buf != NULL) { - struct token * t = ts->buf; - ts->buf = NULL; - return ok(t); - } - return next_token_impl(alct, ts->s); -} - -result peek_token(allocator* alct, token_stream* ts) { - if (ts->buf != NULL) { - return ok(ts->buf); - } - ts->buf = unwrap(next_token_impl(alct, ts->s)); - return ok(ts->buf); -} - -void print_token(token* t) { - switch (t->type) { - case TK_OP: - printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col); - break; - case TK_ARG: - printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col); - break; - case TK_TAG: - printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col); - break; - case TK_COLON: - printf("COLON\n"); - break; - case TK_NEWLINE: - printf("NEWLINE\n"); - break; - case TK_ENDOFFILE: - printf("ENDOFFILE\n"); - break; - } -} - -token_stream* new_token_stream(allocator* alct, FILE* fp) { - input_stream* s = allocate(alct, sizeof(input_stream)); - s->fp = fp; - s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE); - s->buf_pos = 0; - s->cursor = 0; - s->line = 1; - s->col = 1; - token_stream* ts = allocate(alct, sizeof(token_stream)); - ts->s = s; - ts->buf = NULL; - return ts; -} diff --git a/src/as_tokenizer.h b/src/as_tokenizer.h deleted file mode 100644 index 1027530..0000000 --- a/src/as_tokenizer.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef FMV_AS_TOKENIZER_H_ -#define FMV_AS_TOKENIZER_H_ - -#include <stdint.h> -#include <stdio.h> - -#include "utils.h" - -enum token_type { - TK_OP, TK_ARG, TK_TAG, TK_COLON, TK_NEWLINE, TK_ENDOFFILE -}; -typedef enum token_type token_type; - -struct token { - enum token_type type; - int line; - int col; - char *sval; - int64_t ival; - double fval; -}; -typedef struct token token; - -#define INPUT_STREAM_BUF_SIZE 1024 - -struct input_stream{ - FILE *fp; - char *buf; - int buf_pos; - int cursor; - int line; - int col; -}; -typedef struct input_stream input_stream; - -struct token_stream { - token *buf; - input_stream *s; -}; -typedef struct token_stream token_stream; - -// result<token*> -result next_token(allocator * alct, token_stream * ts); - -// result<token*> -result peek_token(allocator * alct, token_stream * ts); - -void print_token(struct token *t); - -token_stream* new_token_stream(allocator * alct, FILE* fp); - -#endif // FMV_AS_TOKENIZER_H_ diff --git a/tests/test_as_analyzer.c b/tests/test_as_analyzer.c deleted file mode 100644 index 993860a..0000000 --- a/tests/test_as_analyzer.c +++ /dev/null @@ -1,45 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <assert.h> - -#include "as_tokenizer.h" -#include "as_parser.h" -#include "as_analyzer.h" -#include "utils.h" - -char *input_buffer = - "start:\n" - " imm 1\n" - "mid: add\n" - " call start\n" - " sub\n" - ".insec: add\n" - " div\n" - "end: eq\n"; - -int main(int argc, char** argv) { - struct result result; - printf("[TEST] assembler analyzer\n"); - // make a memory buffer to FILE* - FILE *fp = fmemopen(input_buffer, strlen(input_buffer), "r"); - struct allocator * alct = new_allocator(); - struct token_stream * ts = new_token_stream(alct, fp); - result = parse_prog(alct, ts); - assert(result.errmsg == NULL); - struct prog* prog = result.value; - struct sym_table sym_table = analyze_prog(alct, prog); - - assert(strcmp(sym_table.buf[0].name, "start") == 0); - assert(strcmp(sym_table.buf[1].name, "mid") == 0); - assert(strcmp(sym_table.buf[2].name, "mid.insec") == 0); - assert(strcmp(sym_table.buf[3].name, "end") == 0); - - assert(sym_table.buf[0].offset == 0); - assert(sym_table.buf[1].offset == 9); - assert(sym_table.buf[2].offset == 21); - assert(sym_table.buf[3].offset == 23); - - printf("[PASS] assembler analyzer\n"); - delete_allocator(alct); - return 0; -} diff --git a/tests/test_as_parser.c b/tests/test_as_parser.c deleted file mode 100644 index 62a4b8d..0000000 --- a/tests/test_as_parser.c +++ /dev/null @@ -1,47 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <assert.h> - -#include "as_tokenizer.h" -#include "as_parser.h" -#include "utils.h" - -char *input_buffer = - "start:\n" - " add 1\n" - " sub start\n" - " div\n" - "end: eq\n"; - -int main(int argc, char** argv) { - struct result result; - printf("[TEST] assembler parser\n"); - // make a memory buffer to FILE* - FILE *fp = fmemopen(input_buffer, strlen(input_buffer), "r"); - struct allocator * alct = new_allocator(); - struct token_stream * ts = new_token_stream(alct, fp); - result = parse_prog(alct, ts); - if (result.errmsg != NULL) { - printf("error: %s\n", result.errmsg); - } - assert(result.errmsg == NULL); - struct prog* prog = result.value; - - // compare output - struct stmt * * stmts = prog->stmts->stmts; - - assert(stmts[0]->instr == NULL); - assert(strcmp("start", stmts[0]->label->name) == 0); - - assert(stmts[1]->label == NULL); - assert(stmts[1]->instr->op == OP_ADD); - assert(stmts[1]->instr->arg->ival == 1); - - assert(strcmp("end", stmts[4]->label->name) == 0); - assert(stmts[4]->instr->op == OP_EQ); - - printf("[PASS] assembler parser\n"); - fclose(fp); - delete_allocator(alct); - return 0; -} diff --git a/tests/test_as_tokenizer.c b/tests/test_as_tokenizer.c deleted file mode 100644 index 995c764..0000000 --- a/tests/test_as_tokenizer.c +++ /dev/null @@ -1,65 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> - -#include "as_tokenizer.h" -#include "utils.h" - -char *input_buffer = - "start:\n" - " add 1\n" - " sub start\n" - " div\n" - " eq\n"; - -char *expected_output = - "LABEL: start, line: 1, col: 1\n" - "COLON\n" - "NEWLINE\n" - "OP: add, line: 2, col: 5\n" - "ARG: 1, line: 2, col: 10\n" - "NEWLINE\n" - "OP: sub, line: 3, col: 5\n" - "LABEL: start, line: 3, col: 9\n" - "NEWLINE\n" - "OP: div, line: 4, col: 5\n" - "NEWLINE\n" - "OP: eq, line: 5, col: 5\n" - "NEWLINE\n" - "ENDOFFILE\n"; - -int main(int argc, char** argv) { - printf("[TEST] assembler tokenizer\n"); - // make a memory buffer to FILE* - FILE *fp = fmemopen(input_buffer, strlen(input_buffer), "r"); - struct allocator * alct = new_allocator(); - struct token_stream * ts = new_token_stream(alct, fp); - - char *output_buffer = malloc(10240); - // redirect stdout to a file - FILE *out = fmemopen(output_buffer, 10240, "w"); - FILE *origin_stdout = stdout; - stdout = out; - struct token* token; - struct result result; - while (1) { - result = peek_token(alct, ts); - assert(result.errmsg == NULL); - assert(result.value != NULL); - token = result.value; - print_token(token); - if (token->type == TK_ENDOFFILE) break; - next_token(alct, ts); - } - fclose(out); - stdout = origin_stdout; - - assert(strcmp(output_buffer, expected_output) == 0); - printf("[PASS] assembler tokenizer\n"); - free(output_buffer); - delete_allocator(alct); - return 0; -} - - |
