#include #include #include #include "as_tokenizer.h" #include "as_op.h" #include "utils.h" int input_stream_next_char(input_stream_t s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { s->cursor = -1; return EOF; } s->buf_pos = n; s->cursor = 0; } int c = s->buf[s->cursor]; s->cursor++; if (c == '\n') { s->line++; s->col = 1; } else { s->col++; } return c; } int input_stream_peek_char(input_stream_t s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { return EOF; } s->buf_pos = n; s->cursor = 0; } return s->buf[s->cursor]; } int is_start_of_identifier(int c) { if (c >= 'a' && c <= 'z') { return 1; } if (c >= 'A' && c <= 'Z') { return 1; } if (c == '_') { return 1; } return 0; } int is_part_of_identifier(int c) { if (is_start_of_identifier(c)) { return 1; } if (c >= '0' && c <= '9') { return 1; } return 0; } token_t next_token_impl(allocator_t alct, input_stream_t s) { token_t t = allocate(alct, sizeof(struct token)); int c; while (1) { c = input_stream_peek_char(s); if (c == EOF) { break; } if (c == '\n') { input_stream_next_char(s); *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col}; return t; } if (c == ':') { input_stream_next_char(s); *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col}; return t; } if (c == ' ' || c == '\t') { input_stream_next_char(s); continue; } if (c >= '0' && c <= '9') { int64_t ival = 0; while (1) { input_stream_next_char(s); ival = ival * 10 + (c - '0'); c = input_stream_peek_char(s); if (c < '0' || c > '9') { break; } } *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col}; return t; } if (is_start_of_identifier(c)) { size_t line = s->line; size_t col = s->col; char *sval = allocate(alct, 256); size_t i = 0; while(1) { if (i >= 255) { fprintf(stderr, "error: identifier too long\n"); exit(1); } input_stream_next_char(s); sval[i++] = c; c = input_stream_peek_char(s); if (!is_part_of_identifier(c)) { break; } } sval[i] = '\0'; if (isOp(sval)) { *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col}; return t; } *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col}; return t; } fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col); } // end of file *t = (struct token){.type = TK_ENDOFFILE}; return t; } token_t next_token(allocator_t alct, token_stream_t ts) { if (ts->buf != NULL) { token_t t = ts->buf; ts->buf = NULL; return t; } token_t t = next_token_impl(alct, ts->s); return t; } token_t peek_token(allocator_t alct, token_stream_t ts) { if (ts->buf != NULL) { return ts->buf; } ts->buf = next_token_impl(alct, ts->s); return ts->buf; } void print_token(token_t t) { switch (t->type) { case TK_OP: printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case TK_ARG: printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col); break; case TK_TAG: printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case TK_COLON: printf("COLON\n"); break; case TK_NEWLINE: printf("NEWLINE\n"); break; case TK_ENDOFFILE: printf("ENDOFFILE\n"); break; } } token_stream_t new_token_stream(allocator_t alct, FILE* fp) { input_stream_t s = allocate(alct, sizeof(struct input_stream)); s->fp = fp; s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE); s->buf_pos = 0; s->cursor = 0; s->line = 1; s->col = 1; token_stream_t ts = allocate(alct, sizeof(struct token_stream)); ts->s = s; ts->buf = NULL; return ts; }