diff options
| author | Mistivia <i@mistivia.com> | 2025-03-26 19:11:10 +0800 |
|---|---|---|
| committer | Mistivia <i@mistivia.com> | 2025-03-26 19:11:10 +0800 |
| commit | 97d4462ac24b726d9313ec52ca0f11711ead553b (patch) | |
| tree | 1bba7f6d4f2690d673b810bda4ec34523034bcaa /src/as_tokenizer.c | |
| parent | 312716a295626f2b60b41777728c7f220fee843d (diff) | |
Diffstat (limited to 'src/as_tokenizer.c')
| -rw-r--r-- | src/as_tokenizer.c | 193 |
1 files changed, 0 insertions, 193 deletions
diff --git a/src/as_tokenizer.c b/src/as_tokenizer.c deleted file mode 100644 index 1651ccb..0000000 --- a/src/as_tokenizer.c +++ /dev/null @@ -1,193 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "as_tokenizer.h" -#include "as_op.h" -#include "utils.h" - -int input_stream_next_char(input_stream* s) { - if (s->cursor == -1) { - return EOF; - } - if (s->buf_pos == s->cursor) { - size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); - if (n == 0) { - s->cursor = -1; - return EOF; - } - s->buf_pos = n; - s->cursor = 0; - } - int c = s->buf[s->cursor]; - s->cursor++; - if (c == '\n') { - s->line++; - s->col = 1; - } else { - s->col++; - } - return c; -} - -int input_stream_peek_char(input_stream* s) { - if (s->cursor == -1) { - return EOF; - } - if (s->buf_pos == s->cursor) { - size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); - if (n == 0) { - return EOF; - } - s->buf_pos = n; - s->cursor = 0; - } - return s->buf[s->cursor]; -} - -int is_start_of_identifier(int c) { - if (c >= 'a' && c <= 'z') { - return 1; - } - if (c >= 'A' && c <= 'Z') { - return 1; - } - if (c == '_') { - return 1; - } - if (c == '.') { - return 1; - } - return 0; -} - -int is_part_of_identifier(int c) { - if (is_start_of_identifier(c)) { - return 1; - } - if (c >= '0' && c <= '9') { - return 1; - } - return 0; -} - -result next_token_impl(allocator* alct, input_stream* s) { - const char *errmsg; - token* t = allocate(alct, sizeof(token)); - int c; - while (1) { - c = input_stream_peek_char(s); - if (c == EOF) { - break; - } - if (c == '\n') { - input_stream_next_char(s); - *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col}; - return ok(t); - } - if (c == ':') { - input_stream_next_char(s); - *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col}; - return ok(t); - } - if (c == ' ' || c == '\t') { - input_stream_next_char(s); - continue; - } - if (c >= '0' && c <= '9') { - int64_t ival = 0; - while (1) { - input_stream_next_char(s); - ival = ival * 10 + (c - '0'); - c = input_stream_peek_char(s); - if (c < '0' || c > '9') { - break; - } - } - *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col}; - return ok(t); - } - if (is_start_of_identifier(c)) { - size_t line = s->line; - size_t col = s->col; - char *sval = allocate(alct, 256); - size_t i = 0; - while (1) { - if (i >= 255) { - return err(safe_sprintf(alct, "error: identifier too long\n")); - } - input_stream_next_char(s); - sval[i++] = c; - c = input_stream_peek_char(s); - if (!is_part_of_identifier(c)) { - break; - } - } - sval[i] = '\0'; - if (is_op(sval)) { - *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col}; - return ok(t); - } - *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col}; - return ok(t); - } - return err(safe_sprintf(alct, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col)); - } - // end of file - *t = (struct token){.type = TK_ENDOFFILE}; - return ok(t); -} - -result next_token(allocator* alct, token_stream* ts) { - if (ts->buf != NULL) { - struct token * t = ts->buf; - ts->buf = NULL; - return ok(t); - } - return next_token_impl(alct, ts->s); -} - -result peek_token(allocator* alct, token_stream* ts) { - if (ts->buf != NULL) { - return ok(ts->buf); - } - ts->buf = unwrap(next_token_impl(alct, ts->s)); - return ok(ts->buf); -} - -void print_token(token* t) { - switch (t->type) { - case TK_OP: - printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col); - break; - case TK_ARG: - printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col); - break; - case TK_TAG: - printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col); - break; - case TK_COLON: - printf("COLON\n"); - break; - case TK_NEWLINE: - printf("NEWLINE\n"); - break; - case TK_ENDOFFILE: - printf("ENDOFFILE\n"); - break; - } -} - -token_stream* new_token_stream(allocator* alct, FILE* fp) { - input_stream* s = allocate(alct, sizeof(input_stream)); - s->fp = fp; - s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE); - s->buf_pos = 0; - s->cursor = 0; - s->line = 1; - s->col = 1; - token_stream* ts = allocate(alct, sizeof(token_stream)); - ts->s = s; - ts->buf = NULL; - return ts; -} |
