123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "as_tokenizer.h"
- #include "as_op.h"
- #include "utils.h"
- int input_stream_next_char(input_stream_t s) {
- if (s->cursor == -1) {
- return EOF;
- }
- if (s->buf_pos == s->cursor) {
- size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
- if (n == 0) {
- s->cursor = -1;
- return EOF;
- }
- s->buf_pos = n;
- s->cursor = 0;
- }
- int c = s->buf[s->cursor];
- s->cursor++;
- if (c == '\n') {
- s->line++;
- s->col = 1;
- } else {
- s->col++;
- }
- return c;
- }
- int input_stream_peek_char(input_stream_t s) {
- if (s->cursor == -1) {
- return EOF;
- }
- if (s->buf_pos == s->cursor) {
- size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
- if (n == 0) {
- return EOF;
- }
- s->buf_pos = n;
- s->cursor = 0;
- }
- return s->buf[s->cursor];
- }
- int is_start_of_identifier(int c) {
- if (c >= 'a' && c <= 'z') {
- return 1;
- }
- if (c >= 'A' && c <= 'Z') {
- return 1;
- }
- if (c == '_') {
- return 1;
- }
- return 0;
- }
- int is_part_of_identifier(int c) {
- if (is_start_of_identifier(c)) {
- return 1;
- }
- if (c >= '0' && c <= '9') {
- return 1;
- }
- return 0;
- }
- token_t next_token_impl(allocator_t alct, input_stream_t s) {
- token_t t = allocate(alct, sizeof(struct token));
- int c;
- while (1) {
- c = input_stream_peek_char(s);
- if (c == EOF) {
- break;
- }
- if (c == '\n') {
- input_stream_next_char(s);
- *t = (struct token){.type = TK_NEWLINE, .line = s->line, .col = s->col};
- return t;
- }
- if (c == ':') {
- input_stream_next_char(s);
- *t = (struct token){.type = TK_COLON, .line = s->line, .col = s->col};
- return t;
- }
- if (c == ' ' || c == '\t') {
- input_stream_next_char(s);
- continue;
- }
- if (c >= '0' && c <= '9') {
- int64_t ival = 0;
- while (1) {
- input_stream_next_char(s);
- ival = ival * 10 + (c - '0');
- c = input_stream_peek_char(s);
- if (c < '0' || c > '9') {
- break;
- }
- }
- *t = (struct token){.type = TK_ARG, .ival = ival, .line = s->line, .col = s->col};
- return t;
- }
- if (is_start_of_identifier(c)) {
- size_t line = s->line;
- size_t col = s->col;
- char *sval = allocate(alct, 256);
- size_t i = 0;
- while(1) {
- if (i >= 255) {
- fprintf(stderr, "error: identifier too long\n");
- exit(1);
- }
- input_stream_next_char(s);
- sval[i++] = c;
- c = input_stream_peek_char(s);
- if (!is_part_of_identifier(c)) {
- break;
- }
- }
- sval[i] = '\0';
- if (isOp(sval)) {
- *t = (struct token){.type = TK_OP, .sval = sval, .line = line, .col = col};
- return t;
- }
- *t = (struct token){.type = TK_TAG, .sval = sval, .line = line, .col = col};
- return t;
- }
- fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col);
- }
- // end of file
- *t = (struct token){.type = TK_ENDOFFILE};
- return t;
- }
- token_t next_token(allocator_t alct, token_stream_t ts) {
- if (ts->buf != NULL) {
- token_t t = ts->buf;
- ts->buf = NULL;
- return t;
- }
- token_t t = next_token_impl(alct, ts->s);
- return t;
- }
- token_t peek_token(allocator_t alct, token_stream_t ts) {
- if (ts->buf != NULL) {
- return ts->buf;
- }
- ts->buf = next_token_impl(alct, ts->s);
- return ts->buf;
- }
- void print_token(token_t t) {
- switch (t->type) {
- case TK_OP:
- printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
- break;
- case TK_ARG:
- printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col);
- break;
- case TK_TAG:
- printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
- break;
- case TK_COLON:
- printf("COLON\n");
- break;
- case TK_NEWLINE:
- printf("NEWLINE\n");
- break;
- case TK_ENDOFFILE:
- printf("ENDOFFILE\n");
- break;
- }
- }
- token_stream_t new_token_stream(allocator_t alct, FILE* fp) {
- input_stream_t s = allocate(alct, sizeof(struct input_stream));
- s->fp = fp;
- s->buf = allocate(alct, INPUT_STREAM_BUF_SIZE);
- s->buf_pos = 0;
- s->cursor = 0;
- s->line = 1;
- s->col = 1;
- token_stream_t ts = allocate(alct, sizeof(struct token_stream));
- ts->s = s;
- ts->buf = NULL;
- return ts;
- }
|