#include #include #include #include "as_tokenizer.h" #include "as_op.h" #include "utils.h" int input_stream_next_char(struct input_stream * s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { s->cursor = -1; return EOF; } s->buf_pos = n; s->cursor = 0; } int c = s->buf[s->cursor]; s->cursor++; if (c == '\n') { s->line++; s->col = 1; } else { s->col++; } return c; } int input_stream_peek_char(struct input_stream * s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { return EOF; } s->buf_pos = n; s->cursor = 0; } return s->buf[s->cursor]; } int is_start_of_identifier(int c) { if (c >= 'a' && c <= 'z') { return 1; } if (c >= 'A' && c <= 'Z') { return 1; } if (c == '_') { return 1; } if (c == '.') { return 1; } return 0; } int is_part_of_identifier(int c) { if (is_start_of_identifier(c)) { return 1; } if (c >= '0' && c <= '9') { return 1; } return 0; } struct result next_token_impl(struct allocator * alct, struct input_stream * s) { const char *errmsg; struct token * t = allocate(alct, sizeof(struct token)); int c; while (1) { c = input_stream_peek_char(s); if (c == EOF) { break; } if (c == '\n') { input_stream_next_char(s); *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col}; return ok(t); } if (c == ':') { input_stream_next_char(s); *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col}; return ok(t); } if (c == ' ' || c == '\t') { input_stream_next_char(s); continue; } if (c >= '0' && c <= '9') { int64_t ival = 0; while (1) { input_stream_next_char(s); ival = ival * 10 + (c - '0'); c = input_stream_peek_char(s); if (c < '0' || c > '9') { break; } } *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col}; return ok(t); } if (is_start_of_identifier(c)) { size_t line = s->line; size_t col = s->col; char *sval = allocate(alct, 256); size_t i = 0; while (1) { if (i >= 255) { return err(safe_sprintf(alct, "error: identifier too long\n")); } input_stream_next_char(s); sval[i++] = c; c = input_stream_peek_char(s); if (!is_part_of_identifier(c)) { break; } } sval[i] = '\0'; if (isOp(sval)) { *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col}; return ok(t); } *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col}; return ok(t); } return err(safe_sprintf(alct, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col)); } // end of file *t = (struct token){.type = TK_ENDOFFILE}; return ok(t); } struct result next_token(struct allocator * alct, struct token_stream * ts) { if (ts->buf != NULL) { struct token * t = ts->buf; ts->buf = NULL; return ok(t); } return next_token_impl(alct, ts->s); } struct result peek_token(struct allocator * alct, struct token_stream * ts) { if (ts->buf != NULL) { return ok(ts->buf); } ts->buf = unwrap(next_token_impl(alct, ts->s)); return ok(ts->buf); } void print_token(struct token * t) { switch (t->type) { case TK_OP: printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case TK_ARG: printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col); break; case TK_TAG: printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case TK_COLON: printf("COLON\n"); break; case TK_NEWLINE: printf("NEWLINE\n"); break; case TK_ENDOFFILE: printf("ENDOFFILE\n"); break; } } struct token_stream * new_token_stream(struct allocator * alct, FILE* fp) { struct input_stream * s = allocate(alct, sizeof(struct input_stream)); s->fp = fp; s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE); s->buf_pos = 0; s->cursor = 0; s->line = 1; s->col = 1; struct token_stream * ts = allocate(alct, sizeof(struct token_stream)); ts->s = s; ts->buf = NULL; return ts; }