aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--src/as_main.c25
-rw-r--r--src/as_tokenizer.c198
-rw-r--r--src/as_tokenizer.h36
-rw-r--r--tests/test_as_tokenizer.c53
5 files changed, 316 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index 6ef7466..bdf1afb 100644
--- a/Makefile
+++ b/Makefile
@@ -4,15 +4,15 @@ ldflags = -lm
cc = gcc
csc = chicken-csc
-src = $(shell find src/ -name '*.c' -not -name 'main.c')
+src = $(shell find src/ -name '*.c' -not -name '*main.c')
obj = $(src:.c=.o)
tests=$(shell find tests/ -name '*.c')
tests_bin=$(tests:.c=.bin)
-all: $(target) fvm-as
+all: $(target) fvm-as
-fvm-as: assembler/fvm-as.scm
- $(csc) $< -o $@
+fvm-as: $(obj) src/as_main.c
+ $(cc) $(cflags) $(ldflags) $^ -o $@
full: all $(tests_bin)
diff --git a/src/as_main.c b/src/as_main.c
new file mode 100644
index 0000000..8585fa1
--- /dev/null
+++ b/src/as_main.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+#include "as_tokenizer.h"
+
+// AST
+// ===
+//
+// <prog> ::= <stmts>
+// <stmts> ::= <stmt> <newline> | <stmt> <stmts> <newline>
+// <stmt> ::= <tag> <instr> | <instr> | <tag>
+// <instr> ::= <op> | <op> <arg> | <op> <label>
+// <tag> ::= <label> :
+// <op> ::= add | sub | mul | div | mod | eq
+
+int main(int argc, char** argv) {
+ if (argc != 2) {
+ fprintf(stderr, "Usage: fvm-as <inputfile>\n");
+ return 1;
+ }
+
+ FILE *fp = fopen(argv[1], "r");
+ TokenStream* ts = makeTokenStream(fp);
+ return 0;
+}
+
diff --git a/src/as_tokenizer.c b/src/as_tokenizer.c
new file mode 100644
index 0000000..9766372
--- /dev/null
+++ b/src/as_tokenizer.c
@@ -0,0 +1,198 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "as_tokenizer.h"
+
+int InputStream_nextChar(InputStream *s) {
+ if (s->cursor == -1) {
+ return EOF;
+ }
+ if (s->buf_pos == s->cursor) {
+ size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
+ if (n == 0) {
+ s->cursor = -1;
+ return EOF;
+ }
+ s->buf_pos = n;
+ s->cursor = 0;
+ }
+ int c = s->buf[s->cursor];
+ s->cursor++;
+ if (c == '\n') {
+ s->line++;
+ s->col = 1;
+ } else {
+ s->col++;
+ }
+ return c;
+}
+
+int InputStream_peekChar(InputStream *s) {
+ if (s->cursor == -1) {
+ return EOF;
+ }
+ if (s->buf_pos == s->cursor) {
+ size_t n = fread(s->buf, 1, INPUT_STREAM_BUF_SIZE, s->fp);
+ if (n == 0) {
+ return EOF;
+ }
+ s->buf_pos = n;
+ s->cursor = 0;
+ }
+ return s->buf[s->cursor];
+}
+
+char* ops[] = {
+ "add", "sub", "mul", "div", "mod", "eq"
+};
+
+int isOp(const char* str) {
+ for (int i = 0; i < sizeof(ops) / sizeof(ops[0]); i++) {
+ if (strcmp(ops[i], str) == 0) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int isStartOfIndentifier(int c) {
+ if (c >= 'a' && c <= 'z') {
+ return 1;
+ }
+ if (c >= 'A' && c <= 'Z') {
+ return 1;
+ }
+ if (c == '_') {
+ return 1;
+ }
+ return 0;
+}
+
+int isPartOfIndentifier(int c) {
+ if (isStartOfIndentifier(c)) {
+ return 1;
+ }
+ if (c >= '0' && c <= '9') {
+ return 1;
+ }
+ return 0;
+}
+
+Token nextTokenImpl(InputStream *s) {
+ int c;
+ while (1) {
+ c = InputStream_peekChar(s);
+ if (c == EOF) {
+ break;
+ }
+ if (c == '\n') {
+ InputStream_nextChar(s);
+ Token t = (Token){.type = NEWLINE, .line = s->line, .col = s->col};
+ return t;
+ }
+ if (c == ':') {
+ InputStream_nextChar(s);
+ return (Token){.type = COLON, .line = s->line, .col = s->col};
+ }
+ if (c == ' ' || c == '\t') {
+ InputStream_nextChar(s);
+ continue;
+ }
+ if (c >= '0' && c <= '9') {
+ int64_t ival = 0;
+ while (1) {
+ InputStream_nextChar(s);
+ ival = ival * 10 + (c - '0');
+ c = InputStream_peekChar(s);
+ if (c < '0' || c > '9') {
+ break;
+ }
+ }
+ return (Token){.type = ARG, .ival = ival, .line = s->line, .col = s->col};
+ }
+ if (isStartOfIndentifier(c)) {
+ size_t line = s->line;
+ size_t col = s->col;
+ char *sval = malloc(256);
+ size_t i = 0;
+ while(1) {
+ if (i >= 255) {
+ fprintf(stderr, "error: identifier too long\n");
+ exit(1);
+ }
+ InputStream_nextChar(s);
+ sval[i++] = c;
+ c = InputStream_peekChar(s);
+ if (!isPartOfIndentifier(c)) {
+ break;
+ }
+ }
+ sval[i] = '\0';
+ if (isOp(sval)) {
+ return (Token){.type = OP, .sval = sval, .line = line, .col = col};
+ }
+ return (Token){.type = LABEL, .sval = sval, .line = line, .col = col};
+ }
+ fprintf(stderr, "error: invalid character %c at line %d, col %d\n", c, s->line, s->col);
+ }
+ // end of file
+ return (Token){.type = ENDOFFILE};
+}
+
+Token *nextToken(TokenStream *ts) {
+ if (ts->buf != NULL) {
+ Token *t = ts->buf;
+ ts->buf = NULL;
+ return t;
+ }
+ Token *t = malloc(sizeof(Token));
+ *t = nextTokenImpl(ts->s);
+ return t;
+}
+
+Token *peekToken(TokenStream *ts) {
+ if (ts->buf != NULL) {
+ return ts->buf;
+ }
+ ts->buf = malloc(sizeof(Token));
+ *(ts->buf) = nextTokenImpl(ts->s);
+ return ts->buf;
+}
+
+void printToken(Token *t) {
+ switch (t->type) {
+ case OP:
+ printf("OP: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
+ break;
+ case ARG:
+ printf("ARG: %ld, line: %d, col: %d\n", t->ival, t->line, t->col);
+ break;
+ case LABEL:
+ printf("LABEL: %s, line: %d, col: %d\n", t->sval, t->line, t->col);
+ break;
+ case COLON:
+ printf("COLON\n");
+ break;
+ case NEWLINE:
+ printf("NEWLINE\n");
+ break;
+ case ENDOFFILE:
+ printf("ENDOFFILE\n");
+ break;
+ }
+}
+
+TokenStream* makeTokenStream(FILE* fp) {
+ InputStream *s = malloc(sizeof(InputStream));
+ s->fp = fp;
+ s->buf = malloc(INPUT_STREAM_BUF_SIZE);
+ s->buf_pos = 0;
+ s->cursor = 0;
+ s->line = 1;
+ s->col = 1;
+ TokenStream *ts = malloc(sizeof(TokenStream));
+ ts->s = s;
+ ts->buf = NULL;
+ return ts;
+}
diff --git a/src/as_tokenizer.h b/src/as_tokenizer.h
new file mode 100644
index 0000000..fef8625
--- /dev/null
+++ b/src/as_tokenizer.h
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdio.h>
+
+typedef enum {
+ OP, ARG, LABEL, COLON, NEWLINE, ENDOFFILE
+} TokenType;
+
+typedef struct {
+ TokenType type;
+ int line;
+ int col;
+ char *sval;
+ int64_t ival;
+ double fval;
+} Token;
+
+#define INPUT_STREAM_BUF_SIZE 1024
+
+typedef struct {
+ FILE *fp;
+ char *buf;
+ int buf_pos;
+ int cursor;
+ int line;
+ int col;
+} InputStream;
+
+typedef struct {
+ Token* buf;
+ InputStream *s;
+} TokenStream;
+
+Token *nextToken(TokenStream *ts);
+Token *peekToken(TokenStream *ts);
+void printToken(Token *t);
+TokenStream* makeTokenStream(FILE* fp);
diff --git a/tests/test_as_tokenizer.c b/tests/test_as_tokenizer.c
new file mode 100644
index 0000000..c32eddb
--- /dev/null
+++ b/tests/test_as_tokenizer.c
@@ -0,0 +1,53 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "as_tokenizer.h"
+
+char *inputBuffer =
+ "start:\n"
+ " add 1\n"
+ " sub start\n"
+ " div\n"
+ " eq\n";
+
+char *expectedOutput =
+ "LABEL: start, line: 1, col: 1\n"
+ "COLON\n"
+ "NEWLINE\n"
+ "OP: add, line: 2, col: 5\n"
+ "ARG: 1, line: 2, col: 10\n"
+ "NEWLINE\n"
+ "OP: sub, line: 3, col: 5\n"
+ "LABEL: start, line: 3, col: 9\n"
+ "NEWLINE\n"
+ "OP: div, line: 4, col: 5\n"
+ "NEWLINE\n"
+ "OP: eq, line: 5, col: 5\n"
+ "NEWLINE\n";
+
+int main(int argc, char** argv) {
+ printf("[TEST] assembler tokenizer\n");
+ // make a memory buffer to FILE*
+ FILE *fp = fmemopen(inputBuffer, strlen(inputBuffer), "r");
+ TokenStream* ts = makeTokenStream(fp);
+
+ char *outputBuffer = malloc(10240);
+ // redirect stdout to a file
+ FILE *out = fmemopen(outputBuffer, 10240, "w");
+ FILE *origin_stdout = stdout;
+ stdout = out;
+ while (peekToken(ts)->type != ENDOFFILE) {
+ printToken(peekToken(ts));
+ nextToken(ts);
+ }
+ fclose(out);
+ stdout = origin_stdout;
+ // compare outputBuffer with expectedOutput
+ assert(strcmp(outputBuffer, expectedOutput) == 0);
+ printf("[PASS] assembler tokenizer\n");
+ return 0;
+}
+
+