Browse Source

add result style error handling

Mistivia 1 week ago
parent
commit
fb7664e087
9 changed files with 199 additions and 75 deletions
  1. 102 42
      src/as_parser.c
  2. 6 6
      src/as_parser.h
  3. 23 20
      src/as_tokenizer.c
  4. 7 2
      src/as_tokenizer.h
  5. 32 0
      src/utils.c
  6. 7 0
      src/utils.h
  7. 4 1
      tests/test_as_analyzer.c
  8. 7 1
      tests/test_as_parser.c
  9. 11 3
      tests/test_as_tokenizer.c

+ 102 - 42
src/as_parser.c

@@ -16,20 +16,38 @@
 // <label> ::= tag ":"
 // <op> ::= "add" | "sub" | "mul" | "div" | "mod" | "eq" | ...
 
-struct prog * parse_prog(struct allocator * alct, struct token_stream * ts) {
+struct result parse_prog(struct allocator * alct, struct token_stream * ts) {
+    struct result result;
     struct prog * p = allocate(alct, sizeof(struct prog));
-    p->stmts = parse_stmts(alct, ts);
-    return p;
+    result = parse_stmts(alct, ts);
+    if (result.errmsg != NULL) return result;
+    p->stmts = result.value;
+    return (struct result){.value = p, .errmsg = NULL};
 }
 
-struct stmts * parse_stmts(struct allocator * alct, struct token_stream * ts) {
+struct result parse_stmts(struct allocator * alct, struct token_stream * ts) {
+    struct token *token;
+    struct result result;
+    const char* errmsg;
     struct stmts * ss = allocate(alct, sizeof(struct stmts));
     ss->stmts = allocate(alct, sizeof(struct stmt *));
     ss->stmts[0] = NULL;
     int capacity = 0;
     int len = 0;
-    while (peek_token(alct, ts)->type != TK_ENDOFFILE) {
-        struct stmt * s = parse_stmt(alct, ts);
+
+    while (1) {
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        token = result.value;
+        if (token->type == TK_ENDOFFILE) {
+            break;
+        }
+        
+        result = parse_stmt(alct, ts);
+        if (result.errmsg != NULL) {
+            return result;
+        }
+        struct stmt * s = result.value;
         if (s == NULL) continue;
         if (len == capacity) {
             size_t new_capacity = capacity * 2 + 1;
@@ -39,84 +57,126 @@ struct stmts * parse_stmts(struct allocator * alct, struct token_stream * ts) {
             capacity = new_capacity;
         }
         // expect newline
-        if (peek_token(alct, ts)->type == TK_NEWLINE) {
-            next_token(alct, ts);
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        token = result.value;
+        if (token->type == TK_NEWLINE) {
+            result = next_token(alct, ts);
+            if (result.errmsg != NULL) return result;
         } else {
-            fprintf(stderr, "%d:%d expect newline.\n", peek_token(alct, ts)->line, peek_token(alct, ts)->col);
+            errmsg = safe_sprintf(alct, "%d:%d expect newline.\n", token->line, token->col);
+            return (struct result){.value = NULL, .errmsg = errmsg};
         }
         ss->stmts[len] = s;
         len++;
     }
     ss->stmts[len] = NULL;
-    return ss;
+    return (struct result){.value = ss, .errmsg = NULL};
 }
 
-struct label * parse_label(struct allocator * alct, struct token_stream * ts) {
-    struct token  * t = next_token(alct, ts);
+struct result parse_label(struct allocator * alct, struct token_stream * ts) {
+    const char *errmsg;
+    struct result result;
+    struct token * t;
+    result = next_token(alct, ts);
+    if (result.errmsg != NULL) return result;
+    t = result.value;
     if (t->type != TK_TAG) {
-        fprintf(stderr, "%d:%d expect label.\n", t->line, t->col);
-        exit(-1);
+        errmsg = safe_sprintf(alct, "%d:%d expect label.\n", t->line, t->col);
+        return (struct result){.value = NULL, .errmsg = errmsg};
     }
     struct label * l = allocate(alct, sizeof(struct label *));
     l->name = t->sval;
-    t = next_token(alct, ts);
+    result = next_token(alct, ts);
+    if (result.errmsg != NULL) return result;
+    t = result.value;
     if (t->type != TK_COLON) {
-        fprintf(stderr, "%d:%d expect colon.\n", t->line, t->col);
-        exit(-1);
+        errmsg = safe_sprintf(alct, "%d:%d expect colon.\n", t->line, t->col);
+        return (struct result){.value = NULL, .errmsg = errmsg};
     }
-    return l;
+    return (struct result){.value = l, .errmsg = NULL};
 }
 
-struct stmt * parse_stmt(struct allocator * alct, struct token_stream * ts) {
-    struct token  * t = peek_token(alct, ts);
+struct result parse_stmt(struct allocator * alct, struct token_stream * ts) {
+    const char *errmsg;
+    struct result result;
+    struct token * t;
+    result = peek_token(alct, ts);
+    if (result.errmsg != NULL) return result;
+    t = result.value;
     struct stmt * stmt = allocate(alct, sizeof(struct stmt));
     stmt->label = NULL;
     stmt->instr = NULL;
     if (t->type == TK_TAG) {
-        stmt->label = parse_label(alct, ts);
-        if (peek_token(alct, ts)->type == TK_NEWLINE) {
-            return stmt;
+        result = parse_label(alct, ts);
+        if (result.errmsg != NULL) return result;
+        stmt->label = result.value;
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        t = result.value;
+        if (t->type == TK_NEWLINE) {
+            return (struct result){.value = stmt, .errmsg = NULL};
         }
-        t = peek_token(alct, ts);
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        t = result.value;
     }
     if (t->type == TK_OP) {
-        stmt->instr = parse_instr(alct, ts);
-        if (peek_token(alct, ts)->type == TK_NEWLINE) {
-            return stmt;
+        result = parse_instr(alct, ts);
+        if (result.errmsg != NULL) return result;
+        stmt->instr = result.value;
+
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        t = result.value;
+        if (t->type == TK_NEWLINE) {
+            return (struct result){.value = stmt, .errmsg = NULL};
         }
     }
     if (t->type == TK_NEWLINE) {
-        return NULL;
+        return (struct result){.value = NULL, .errmsg = NULL};
     }
-    fprintf(stderr, "%d:%d expect lable + instruction, lable, or instruction.\n", t->line, t->col);
-    exit(-1);
+    errmsg = safe_sprintf(alct, "%d:%d expect lable + instruction, lable, or instruction.\n", t->line, t->col);
+    return (struct result){.value = NULL, .errmsg = errmsg};
 }
 
-enum op parse_op(struct allocator * alct, struct token_stream * ts) {
-    struct token  * t = next_token(alct, ts);
+struct result parse_op(struct allocator * alct, struct token_stream * ts) {
+    const char *errmsg;
+    struct result result;
+    struct token * t;
+    result = next_token(alct, ts);
+    if (result.errmsg != NULL) return result;
+    t = result.value;
     enum op op;
     if (t->type == TK_OP) {
         op = str2op(t->sval);
         if (op == OP_END) {
-            fprintf(stderr, "%d:%d invalid op.\n", t->line, t->col);
-            exit(-1);
+            errmsg = safe_sprintf(alct, "%d:%d invalid op.\n", t->line, t->col);
+            return (struct result){.value = NULL, .errmsg = errmsg};
         }
     } else {
-        fprintf(stderr, "%d:%d expect op.\n", t->line, t->col);
-        exit(-1);
+        errmsg = safe_sprintf(alct, "%d:%d expect op.\n", t->line, t->col);
+        return (struct result){.value = NULL, .errmsg = errmsg};
     }
-    return op;
+    return (struct result){.value = (void*)op, .errmsg = NULL};
 }
 
-struct instr * parse_instr(struct allocator * alct, struct token_stream * ts) {
-    struct token  * t = peek_token(alct, ts);
+struct result parse_instr(struct allocator * alct, struct token_stream * ts) {
+    struct result result;
+    struct token * t;
+    result = peek_token(alct, ts);
+    if (result.errmsg != NULL) return result;
+    t = result.value;
     struct instr * i = allocate(alct, sizeof(struct instr));
     i->tag_name = NULL;
     i->arg = NULL;
     i->op = OP_END;
     if (t->type == TK_OP) {
-        i->op = parse_op(alct, ts);
-        t = peek_token(alct, ts);
+        result = parse_op(alct, ts);
+        i->op = (enum op)(result.value);
+        result = peek_token(alct, ts);
+        if (result.errmsg != NULL) return result;
+        t = result.value;
         if (t->type == TK_ARG) {
             struct arg * a = allocate(alct, sizeof(struct arg));
             a->ival = t->ival;
@@ -128,5 +188,5 @@ struct instr * parse_instr(struct allocator * alct, struct token_stream * ts) {
             next_token(alct, ts);
         }
     }
-    return i;
+    return (struct result){.value = i, .errmsg = NULL};
 }

+ 6 - 6
src/as_parser.h

@@ -34,11 +34,11 @@ struct prog {
     struct stmts * stmts;
 };
 
-struct prog * parse_prog(struct allocator * alct, struct token_stream * ts);
-struct stmt * parse_stmt(struct allocator * alct, struct token_stream * ts);
-struct stmts * parse_stmts(struct allocator * alct, struct token_stream * ts);
-struct instr * parse_instr(struct allocator * alct, struct token_stream * ts);
-struct label * parse_label(struct allocator * alct, struct token_stream * ts);
-enum op parse_op(struct allocator * alct, struct token_stream * ts);
+struct result parse_prog(struct allocator * alct, struct token_stream * ts);
+struct result parse_stmt(struct allocator * alct, struct token_stream * ts);
+struct result parse_stmts(struct allocator * alct, struct token_stream * ts);
+struct result parse_instr(struct allocator * alct, struct token_stream * ts);
+struct result parse_label(struct allocator * alct, struct token_stream * ts);
+struct result parse_op(struct allocator * alct, struct token_stream * ts);
 
 #endif

+ 23 - 20
src/as_tokenizer.c

@@ -71,8 +71,9 @@ int is_part_of_identifier(int c) {
     return 0;
 }
 
-struct token * next_token_impl(struct allocator * alct, struct input_stream * s) {
-    struct token  * t = allocate(alct, sizeof(struct token));
+struct result next_token_impl(struct allocator * alct, struct input_stream * s) {
+    const char *errmsg;
+    struct token * t = allocate(alct, sizeof(struct token));
     int c;
     while (1) {
         c = input_stream_peek_char(s);
@@ -82,12 +83,12 @@ struct token * next_token_impl(struct allocator * alct, struct input_stream * s)
         if (c == '\n') {
             input_stream_next_char(s);
             *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col};
-            return t;
+            return (struct result){.value = t, .errmsg = NULL};
         }
         if (c == ':') {
             input_stream_next_char(s);
             *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col};
-            return t;
+            return (struct result){.value = t, .errmsg = NULL};
         }
         if (c == ' ' || c == '\t') {
             input_stream_next_char(s);
@@ -104,17 +105,17 @@ struct token * next_token_impl(struct allocator * alct, struct input_stream * s)
                 }
             } 
             *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col};
-            return t;
+            return (struct result){.value = t, .errmsg = NULL};
         }
         if (is_start_of_identifier(c)) {
             size_t line = s->line;
             size_t col = s->col;
             char *sval = allocate(alct, 256);
             size_t i = 0;
-            while(1) {
+            while (1) {
                 if (i >= 255) {
-                    fprintf(stderr, "error: identifier too long\n");
-                    exit(1);
+                    errmsg = safe_sprintf(alct, "error: identifier too long\n");
+                    return (struct result){.value = NULL, .errmsg = errmsg};
                 }
                 input_stream_next_char(s);
                 sval[i++] = c;
@@ -126,34 +127,36 @@ struct token * next_token_impl(struct allocator * alct, struct input_stream * s)
             sval[i] = '\0';
             if (isOp(sval)) {
                 *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col};
-                return t;
+                return (struct result){.value = t, .errmsg = NULL};
             }
             *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col};
-            return t;
+            return (struct result){.value = t, .errmsg = NULL};
         }
-        fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col);
+        errmsg = safe_sprintf(alct, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col);
+        return (struct result){.value = NULL, .errmsg = errmsg};
     }
     // end of file
     *t = (struct token){.type = TK_ENDOFFILE};
-    return t;
+    return (struct result){.value = t, .errmsg = NULL};
 }
 
-struct token * next_token(struct allocator * alct, struct token_stream * ts) {
+struct result next_token(struct allocator * alct, struct token_stream * ts) {
     if (ts->buf != NULL) {
         struct token * t = ts->buf;
         ts->buf = NULL;
-        return t;
+        return (struct result){.value = t, .errmsg = NULL};
     }
-    struct token  * t = next_token_impl(alct, ts->s);
-    return t;
+    return next_token_impl(alct, ts->s);
 }
 
-struct token * peek_token(struct allocator * alct, struct token_stream * ts) {
+struct result peek_token(struct allocator * alct, struct token_stream * ts) {
     if (ts->buf != NULL) {
-        return ts->buf;
+        return (struct result){.value = ts->buf, .errmsg = NULL};
     }
-    ts->buf = next_token_impl(alct, ts->s);
-    return ts->buf;
+    struct result result = next_token_impl(alct, ts->s);
+    if (result.errmsg != NULL) return result;
+    ts->buf = result.value;
+    return result;
 }
 
 void print_token(struct token  * t) {

+ 7 - 2
src/as_tokenizer.h

@@ -35,9 +35,14 @@ struct token_stream {
     struct input_stream *s;
 };
 
-struct token* next_token(struct allocator * alct, struct token_stream * ts);
-struct token* peek_token(struct allocator * alct, struct token_stream * ts);
+// result<token*>
+struct result next_token(struct allocator * alct, struct token_stream * ts);
+
+// result<token*>
+struct result peek_token(struct allocator * alct, struct token_stream * ts);
+
 void print_token(struct token *t);
+
 struct token_stream * new_token_stream(struct allocator * alct, FILE* fp);
 
 #endif // FMV_AS_TOKENIZER_H_

+ 32 - 0
src/utils.c

@@ -2,6 +2,7 @@
 
 #include <stdio.h>
 #include <assert.h>
+#include <stdarg.h>
 
 struct allocator {
     void** bufs;
@@ -39,4 +40,35 @@ void * allocate(struct allocator * alct, size_t size) {
     return ptr;
 }
 
+char* safe_sprintf(struct allocator *alct, const char* format, ...) {
+    va_list args;
+    va_list args_copy;
+    int length;
+    char* buffer;
 
+    va_start(args, format);
+    va_copy(args_copy, args);
+    
+    length = vsnprintf(NULL, 0, format, args);
+    
+    if (length < 0) {
+        va_end(args);
+        va_end(args_copy);
+        return NULL;
+    }
+
+    buffer = (char*)allocate(alct, length + 1);
+
+    if (buffer == NULL) {
+        va_end(args);
+        va_end(args_copy);
+        return NULL;
+    }
+
+    vsprintf(buffer, format, args_copy);
+
+    va_end(args);
+    va_end(args_copy);
+    
+    return buffer;
+}

+ 7 - 0
src/utils.h

@@ -5,9 +5,16 @@
 
 struct allocator;
 
+struct result {
+    void *value;
+    const char* errmsg;
+};
+
 struct allocator * new_allocator();
 void delete_allocator(struct allocator * allocator);
 
 void* allocate(struct allocator * allocator, size_t size);
 
+char* safe_sprintf(struct allocator *alct, const char* format, ...);
+
 #endif // FVM_UTILS_H_

+ 4 - 1
tests/test_as_analyzer.c

@@ -18,12 +18,15 @@ char *input_buffer =
     "end:    eq\n";
 
 int main(int argc, char** argv) {
+    struct result result;
     printf("[TEST] assembler analyzer\n");
     // make a memory buffer to FILE*
     FILE *fp = fmemopen(input_buffer, strlen(input_buffer), "r");
     struct allocator * alct = new_allocator();
     struct token_stream * ts = new_token_stream(alct, fp);
-    struct prog * prog = parse_prog(alct, ts);
+    result = parse_prog(alct, ts);
+    assert(result.errmsg == NULL);
+    struct prog* prog = result.value;
     struct sym_table sym_table = analyze_prog(alct, prog);    
 
     assert(strcmp(sym_table.buf[0].name, "start") == 0);

+ 7 - 1
tests/test_as_parser.c

@@ -14,12 +14,18 @@ char *input_buffer =
     "end:    eq\n";
 
 int main(int argc, char** argv) {
+    struct result result;
     printf("[TEST] assembler parser\n");
     // make a memory buffer to FILE*
     FILE *fp = fmemopen(input_buffer, strlen(input_buffer), "r");
     struct allocator * alct = new_allocator();
     struct token_stream * ts = new_token_stream(alct, fp);
-    struct prog * prog = parse_prog(alct, ts);
+    result = parse_prog(alct, ts);
+    if (result.errmsg != NULL) {
+        printf("error: %s\n", result.errmsg);
+    }
+    assert(result.errmsg == NULL);
+    struct prog* prog = result.value;
     
     // compare output
     struct stmt * * stmts = prog->stmts->stmts;

+ 11 - 3
tests/test_as_tokenizer.c

@@ -26,7 +26,8 @@ char *expected_output =
     "OP: div, line: 4, col: 5\n"
     "NEWLINE\n"
     "OP: eq, line: 5, col: 5\n"
-    "NEWLINE\n";
+    "NEWLINE\n"
+    "ENDOFFILE\n";
 
 int main(int argc, char** argv) {
     printf("[TEST] assembler tokenizer\n");
@@ -40,8 +41,15 @@ int main(int argc, char** argv) {
     FILE *out = fmemopen(output_buffer, 10240, "w");
     FILE *origin_stdout = stdout;
     stdout = out;
-    while (peek_token(alct, ts)->type != TK_ENDOFFILE) {
-        print_token(peek_token(alct, ts));
+    struct token* token;
+    struct result result;
+    while (1) {
+        result = peek_token(alct, ts);
+        assert(result.errmsg == NULL);
+        assert(result.value != NULL);
+        token = result.value;
+        print_token(token);
+        if (token->type == TK_ENDOFFILE) break;
         next_token(alct, ts);
     }
     fclose(out);