#include #include #include #include "as_tokenizer.h" #include "as_op.h" #include "utils.h" int InputStream_nextChar(InputStream s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { s->cursor = -1; return EOF; } s->buf_pos = n; s->cursor = 0; } int c = s->buf[s->cursor]; s->cursor++; if (c == '\n') { s->line++; s->col = 1; } else { s->col++; } return c; } int InputStream_peekChar(InputStream s) { if (s->cursor == -1) { return EOF; } if (s->buf_pos == s->cursor) { size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp); if (n == 0) { return EOF; } s->buf_pos = n; s->cursor = 0; } return s->buf[s->cursor]; } int isStartOfIndentifier(int c) { if (c >= 'a' && c <= 'z') { return 1; } if (c >= 'A' && c <= 'Z') { return 1; } if (c == '_') { return 1; } return 0; } int isPartOfIndentifier(int c) { if (isStartOfIndentifier(c)) { return 1; } if (c >= '0' && c <= '9') { return 1; } return 0; } Token nextTokenImpl(Allocator alct, InputStream s) { Token t = allocate(alct, sizeof(struct token)); int c; while (1) { c = InputStream_peekChar(s); if (c == EOF) { break; } if (c == '\n') { InputStream_nextChar(s); *t = (struct token){.type = NEWLINE, .line = s->line, .col = s->col}; return t; } if (c == ':') { InputStream_nextChar(s); *t = (struct token){.type = COLON, .line = s->line, .col = s->col}; return t; } if (c == ' ' || c == '\t') { InputStream_nextChar(s); continue; } if (c >= '0' && c <= '9') { int64_t ival = 0; while (1) { InputStream_nextChar(s); ival = ival * 10 + (c - '0'); c = InputStream_peekChar(s); if (c < '0' || c > '9') { break; } } *t = (struct token){.type = ARG, .ival = ival, .line = s->line, .col = s->col}; return t; } if (isStartOfIndentifier(c)) { size_t line = s->line; size_t col = s->col; char *sval = allocate(alct, 256); size_t i = 0; while(1) { if (i >= 255) { fprintf(stderr, "error: identifier too long\n"); exit(1); } InputStream_nextChar(s); sval[i++] = c; c = InputStream_peekChar(s); if (!isPartOfIndentifier(c)) { break; } } sval[i] = '\0'; if (isOp(sval)) { *t = (struct token){.type = OP, .sval = sval, .line = line, .col = col}; return t; } *t = (struct token){.type = TAG, .sval = sval, .line = line, .col = col}; return t; } fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col); } // end of file *t = (struct token){.type = ENDOFFILE}; return t; } Token nextToken(Allocator alct, TokenStream ts) { if (ts->buf != NULL) { Token t = ts->buf; ts->buf = NULL; return t; } Token t = nextTokenImpl(alct, ts->s); return t; } Token peekToken(Allocator alct, TokenStream ts) { if (ts->buf != NULL) { return ts->buf; } ts->buf = nextTokenImpl(alct, ts->s); return ts->buf; } void printToken(Token t) { switch (t->type) { case OP: printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case ARG: printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col); break; case TAG: printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col); break; case COLON: printf("COLON\n"); break; case NEWLINE: printf("NEWLINE\n"); break; case ENDOFFILE: printf("ENDOFFILE\n"); break; } } TokenStream makeTokenStream(Allocator alct, FILE* fp) { InputStream s = allocate(alct, sizeof(struct inputStream)); s->fp = fp; s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE); s->buf_pos = 0; s->cursor = 0; s->line = 1; s->col = 1; TokenStream ts = allocate(alct, sizeof(struct tokenStream)); ts->s = s; ts->buf = NULL; return ts; }