Browse Source

add parser

Mistivia 1 month ago
parent
commit
2c228ecb7d
4 changed files with 145 additions and 11 deletions
  1. 0 10
      src/as_main.c
  2. 85 0
      src/as_parser.c
  3. 59 0
      src/as_parser.h
  4. 1 1
      src/as_tokenizer.c

+ 0 - 10
src/as_main.c

@@ -2,16 +2,6 @@
 
 #include "as_tokenizer.h"
 
-// AST
-// ===
-//
-// <prog> ::= <stmts>
-// <stmts> ::= <stmt> <newline> | <stmt> <stmts> <newline>
-// <stmt> ::= <tag> <instr> | <instr> | <tag>
-// <instr> ::= <op> | <op> <arg> | <op> <label>
-// <tag> ::= <label> :
-// <op> ::= add | sub | mul | div | mod | eq
-
 int main(int argc, char** argv) {
     if (argc != 2) {
         fprintf(stderr, "Usage: fvm-as <inputfile>\n");

+ 85 - 0
src/as_parser.c

@@ -0,0 +1,85 @@
+#include "as_parser.h"
+
+#include <stdlib.h>
+
+// BNF
+// ===
+//
+// <prog> ::= <stmts>
+// <stmts> ::= <stmt> <newline> | <stmt> <stmts>
+// <stmt> ::= <tag> <instr> | <instr> | <tag>
+// <instr> ::= <op> | <op> <arg> | <op> <label>
+// <tag> ::= <label> :
+// <op> ::= add | sub | mul | div | mod | eq
+
+
+Prog* parseProg(TokenStream *ts) {
+    Prog *p = malloc(sizeof(Prog));
+    p->stmts = parseStmts(ts);
+    return p;
+}
+
+Stmts* parseStmts(TokenStream *ts) {
+    Stmts *ss = malloc(sizeof(Stmts));
+    ss->stmts = malloc(sizeof(Stmt*));
+    ss->stmts[0] = NULL;
+    int capacity = 0;
+    int len = 0;
+    while (peekToken(ts)->type != ENDOFFILE) {
+        Stmt *s = parseStmt(ts);
+        if (len == capacity) {
+            capacity = capacity * 2 + 1;
+            ss->stmts = realloc(ss->stmts, sizeof(Stmt*) * capacity);
+        }
+        ss->stmts[len] = s;
+        len++;
+    }
+    ss->stmts[len] = NULL;
+    return ss;
+}
+
+Stmt* parseStmt(TokenStream *ts) {
+    Token *t = peekToken(ts);
+    if (t->type == LABEL) {
+        Label *l = parseLabel(ts);
+        t = peekToken(ts);
+        if (t->type == COLON) {
+            nextToken(ts);
+            return (Stmt*)l;
+        } else {
+            Instr *i = parseInstr(ts);
+            Stmt *s = malloc(sizeof(Stmt));
+            s->tag = l;
+            s->instr = i;
+            return s;
+        }
+    } else {
+        Instr *i = parseInstr(ts);
+        Stmt *s = malloc(sizeof(Stmt));
+        s->tag = NULL;
+        s->instr = i;
+        return s;
+    }
+}
+
+Instr* parseInstr(TokenStream *ts) {
+    Token *t = nextToken(ts);
+    Instr *i = malloc(sizeof(Instr));
+    i->labelName = NULL;
+    if (t->type == OP) {
+        i->op = parseOp(ts);
+        t = peekToken(ts);
+        if (t->type == ARG) {
+            Arg *a = malloc(sizeof(Arg));
+            a->ival = t->ival;
+            a->fval = t->fval;
+            a->sval = t->sval;
+            i->arg = a;
+            nextToken(ts);
+        } else if (t->type == LABEL) {
+            i->labelName = t->sval;
+            nextToken(ts);
+        }
+    }
+    return i;
+}

+ 59 - 0
src/as_parser.h

@@ -0,0 +1,59 @@
+#include "as_tokenizer.h"
+
+// BNF
+// ===
+//
+// <prog> ::= <stmts>
+// <stmts> ::= <stmt> <newline> | <stmt> <stmts>
+// <stmt> ::= <tag> <instr> | <instr> | <tag>
+// <instr> ::= <op> | <op> <arg> | <op> <label>
+// <tag> ::= <label> :
+// <op> ::= add | sub | mul | div | mod | eq
+
+enum op {
+    ADD, SUB, MUL, DIV, MOD, EQ
+};
+typedef enum op Op;
+
+struct arg {
+    int64_t ival;
+    double fval;
+    const char *sval;
+};
+typedef struct arg Arg;
+
+struct instr {
+    Op op;
+    Arg* arg;
+    const char* labelName;
+};
+typedef struct instr Instr;
+
+struct label {
+    const char* name;
+};
+typedef struct label Label;
+
+struct stmt {
+    Label* tag;
+    Instr* instr;
+};
+typedef struct stmt Stmt;
+
+struct stmts {
+    Stmt** stmts;
+};
+typedef struct stmts Stmts;
+
+struct prog {
+    Stmts *stmts;
+};
+typedef struct prog Prog;
+
+Prog* parseProg(TokenStream *ts);
+Stmt* parseStmt(TokenStream *ts);
+Stmts* parseStmts(TokenStream *ts);
+Instr* parseInstr(TokenStream *ts);
+Label* parseLabel(TokenStream *ts);
+Op parseOp(TokenStream *ts);
+

+ 1 - 1
src/as_tokenizer.c

@@ -164,7 +164,7 @@ void printToken(Token *t) {
     switch (t->type) {
         case OP:
             printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
-            break;
+ 狗太厉害了           break;
         case ARG:
             printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col);
             break;