123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "as_tokenizer.h"
- #include "as_op.h"
- #include "utils.h"
- int InputStream_nextChar(InputStream s) {
- if (s->cursor == -1) {
- return EOF;
- }
- if (s->buf_pos == s->cursor) {
- size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
- if (n == 0) {
- s->cursor = -1;
- return EOF;
- }
- s->buf_pos = n;
- s->cursor = 0;
- }
- int c = s->buf[s->cursor];
- s->cursor++;
- if (c == '\n') {
- s->line++;
- s->col = 1;
- } else {
- s->col++;
- }
- return c;
- }
- int InputStream_peekChar(InputStream s) {
- if (s->cursor == -1) {
- return EOF;
- }
- if (s->buf_pos == s->cursor) {
- size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
- if (n == 0) {
- return EOF;
- }
- s->buf_pos = n;
- s->cursor = 0;
- }
- return s->buf[s->cursor];
- }
- int isStartOfIndentifier(int c) {
- if (c >= 'a' && c <= 'z') {
- return 1;
- }
- if (c >= 'A' && c <= 'Z') {
- return 1;
- }
- if (c == '_') {
- return 1;
- }
- return 0;
- }
- int isPartOfIndentifier(int c) {
- if (isStartOfIndentifier(c)) {
- return 1;
- }
- if (c >= '0' && c <= '9') {
- return 1;
- }
- return 0;
- }
- Token nextTokenImpl(Allocator alct, InputStream s) {
- Token t = allocate(alct, sizeof(struct token));
- int c;
- while (1) {
- c = InputStream_peekChar(s);
- if (c == EOF) {
- break;
- }
- if (c == '\n') {
- InputStream_nextChar(s);
- *t = (struct token){.type = NEWLINE, .line = s->line, .col = s->col};
- return t;
- }
- if (c == ':') {
- InputStream_nextChar(s);
- *t = (struct token){.type = COLON, .line = s->line, .col = s->col};
- return t;
- }
- if (c == ' ' || c == '\t') {
- InputStream_nextChar(s);
- continue;
- }
- if (c >= '0' && c <= '9') {
- int64_t ival = 0;
- while (1) {
- InputStream_nextChar(s);
- ival = ival * 10 + (c - '0');
- c = InputStream_peekChar(s);
- if (c < '0' || c > '9') {
- break;
- }
- }
- *t = (struct token){.type = ARG, .ival = ival, .line = s->line, .col = s->col};
- return t;
- }
- if (isStartOfIndentifier(c)) {
- size_t line = s->line;
- size_t col = s->col;
- char *sval = allocate(alct, 256);
- size_t i = 0;
- while(1) {
- if (i >= 255) {
- fprintf(stderr, "error: identifier too long\n");
- exit(1);
- }
- InputStream_nextChar(s);
- sval[i++] = c;
- c = InputStream_peekChar(s);
- if (!isPartOfIndentifier(c)) {
- break;
- }
- }
- sval[i] = '\0';
- if (isOp(sval)) {
- *t = (struct token){.type = OP, .sval = sval, .line = line, .col = col};
- return t;
- }
- *t = (struct token){.type = TAG, .sval = sval, .line = line, .col = col};
- return t;
- }
- fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col);
- }
- // end of file
- *t = (struct token){.type = ENDOFFILE};
- return t;
- }
- Token nextToken(Allocator alct, TokenStream ts) {
- if (ts->buf != NULL) {
- Token t = ts->buf;
- ts->buf = NULL;
- return t;
- }
- Token t = nextTokenImpl(alct, ts->s);
- return t;
- }
- Token peekToken(Allocator alct, TokenStream ts) {
- if (ts->buf != NULL) {
- return ts->buf;
- }
- ts->buf = nextTokenImpl(alct, ts->s);
- return ts->buf;
- }
- void printToken(Token t) {
- switch (t->type) {
- case OP:
- printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
- break;
- case ARG:
- printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col);
- break;
- case TAG:
- printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
- break;
- case COLON:
- printf("COLON\n");
- break;
- case NEWLINE:
- printf("NEWLINE\n");
- break;
- case ENDOFFILE:
- printf("ENDOFFILE\n");
- break;
- }
- }
- TokenStream makeTokenStream(Allocator alct, FILE* fp) {
- InputStream s = allocate(alct, sizeof(struct inputStream));
- s->fp = fp;
- s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE);
- s->buf_pos = 0;
- s->cursor = 0;
- s->line = 1;
- s->col = 1;
- TokenStream ts = allocate(alct, sizeof(struct tokenStream));
- ts->s = s;
- ts->buf = NULL;
- return ts;
- }
|