aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
m---------3rdparty/algds0
-rw-r--r--Makefile13
-rwxr-xr-xscripts/runall.sh9
-rw-r--r--src/bamboo.c22
-rw-r--r--src/bamboo.h32
-rw-r--r--src/interp.c150
-rw-r--r--src/interp.h43
-rw-r--r--src/parser.c335
-rw-r--r--src/parser.h60
-rw-r--r--src/sexp.c24
-rw-r--r--src/sexp.h8
-rw-r--r--tests/test_parser.c82
13 files changed, 684 insertions, 95 deletions
diff --git a/.gitignore b/.gitignore
index 5e89843..5db3c66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
*.o
*.d
*.a
+*.bin
bamboo-lisp
compile_commands.json
.cache
diff --git a/3rdparty/algds b/3rdparty/algds
-Subproject b8e8f46f58136464c4fdd0aa37578f2313f0bd9
+Subproject 111a1c8b9f4bafb627dd3911857943ae9a65f29
diff --git a/Makefile b/Makefile
index 89195e4..148ff37 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
mode ?= debug
cc = gcc
includes = -I3rdparty/algds/build/include/
-ldflags = -L3rdparty/algds/build/lib/ -lalgds
+3rdlibs = 3rdparty/algds/build/lib/libalgds.a
+ldflags = # -L3rdparty/algds/build/lib/ -lalgds
ifeq ($(mode), debug)
cflags = $(includes) \
-g \
@@ -18,8 +19,8 @@ tests_bin=$(tests:.c=.bin)
all: bamboo-lisp
-bamboo-lisp: 3rdparty/algds/build/lib/libalgds.a $(obj) src/main.c
- gcc $(ldflags) $(cflags) -o $@ $(obj) src/main.c
+bamboo-lisp: $(obj) src/main.c 3rdparty/algds/build/lib/libalgds.a
+ gcc $(ldflags) $(cflags) -o $@ $^
3rdparty/algds/build/lib/libalgds.a:
cd 3rdparty/algds && \
@@ -33,8 +34,10 @@ test: $(tests_bin)
$(obj):%.o:%.c
$(cc) -c $(cflags) $< -MD -MF $@.d -o $@
-$(tests_bin):%.bin:%.c $(obj)
- $(cc) $(ldflags) $(cflags) -Isrc/ $< $(obj) -MD -MF $@.d -o $@
+$(obj):%.o:$(3rdlibs)
+
+$(tests_bin):%.bin:%.c $(obj) $(3rdlibs)
+ $(cc) $(ldflags) $(cflags) -Isrc/ $< $(obj) $(3rdlibs) -MD -MF $@.d -o $@
clean:
-rm $(shell find tests/ -name '*.bin')
diff --git a/scripts/runall.sh b/scripts/runall.sh
new file mode 100755
index 0000000..3fdc745
--- /dev/null
+++ b/scripts/runall.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+for var in "$@"; do
+ ./$var
+ if [ $? -ne 0 ]; then
+ exit 255
+ fi
+done
+
diff --git a/src/bamboo.c b/src/bamboo.c
deleted file mode 100644
index 70a84f5..0000000
--- a/src/bamboo.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "bamboo.h"
-
-SExpRef new_list1(Bamboo *ctx, SExpRef e1) {
- return cons(ctx, e1, nil(ctx));
-}
-
-SExpRef new_list2(Bamboo *ctx, SExpRef e1, SExpRef e2) {
- return cons(ctx, e1, new_list1(ctx, e2));
-}
-
-SExpRef new_list3(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3) {
- return cons(ctx, e1, new_list2(ctx, e2, e3));
-}
-
-SExpRef new_list4(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4) {
- return cons(ctx, e1, new_list3(ctx, e2, e3, e4));
-}
-
-SExpRef new_list5(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5) {
- return cons(ctx, e1, new_list4(ctx, e2, e3, e4, e5));
-}
-
diff --git a/src/bamboo.h b/src/bamboo.h
deleted file mode 100644
index a424be6..0000000
--- a/src/bamboo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef BAMBOO_LISP_BAMBOO_H_
-#define BAMBOO_LISP_BAMBOO_H_
-
-#include <algds/hash_table.h>
-
-#include "sexp.h"
-
-typedef struct {
- SExpVector objs;
- String2IntHashTable symbols;
-} Bamboo;
-
-void Bamboo_init(Bamboo *self);
-SExp* Bamboo_ref(Bamboo *self, SExpRef ref);
-// TODO: Heap_gc()
-
-SExpRef new_integer(Bamboo *ctx, int64_t val);
-SExpRef new_real(Bamboo *ctx, double val);
-SExpRef new_string(Bamboo *ctx, const char *val);
-SExpRef new_symbol(Bamboo *ctx, const char *val);
-SExpRef cons(Bamboo *ctx, SExpRef car, SExpRef cdr);
-SExpRef nil(Bamboo *ctx);
-SExpRef new_list1(Bamboo *ctx, SExpRef e1);
-SExpRef new_list2(Bamboo *ctx, SExpRef e1, SExpRef e2);
-SExpRef new_list3(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3);
-SExpRef new_list4(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4);
-SExpRef new_list5(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5);
-SExpRef new_list6(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5, SExpRef e6);
-SExpRef new_list7(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5, SExpRef e6, SExpRef e7);
-
-#endif
-
diff --git a/src/interp.c b/src/interp.c
new file mode 100644
index 0000000..bf49dd5
--- /dev/null
+++ b/src/interp.c
@@ -0,0 +1,150 @@
+#include "interp.h"
+#include "algds/hash_table.h"
+#include "sexp.h"
+
+void Interp_init(Interp *self) {
+ SExpVector_init(&self->objs);
+ IntVector_init(&self->empty_space);
+ String2IntHashTable_init(&self->symbols);
+ self->gc_paused = false;
+ SExp sexp;
+ sexp.type = kNilSExp;
+ SExpVector_push_back(&self->objs, sexp);
+ self->nil = (SExpRef){0};
+
+ sexp.type = kEnvSExp;
+ sexp.env.parent= self->nil;
+ sexp.env.bindings = self->nil;
+ SExpVector_push_back(&self->objs, sexp);
+ self->top_level = (SExpRef){1};
+ sexp.type = kEmptySExp;
+ for (int i = 1; i < 1024; i++) {
+ SExpVector_push_back(&self->objs, sexp);
+ IntVector_push_back(&self->empty_space, i);
+ }
+
+ self->evaluating = self->nil;
+ self->stack = cons(self, self->top_level, self->nil);
+}
+
+void Interp_free(Interp *self) {
+ for (size_t i = 0; i < SExpVector_len(&self->objs); i++) {
+ SExp *obj = SExpVector_ref(&self->objs, i);
+ if (obj->type == kSymbolSExp || obj->type == kStringSExp) {
+ free((void*)obj->str);
+ }
+ }
+ String2IntHashTable_free(&self->symbols);
+ SExpVector_free(&self->objs);
+ IntVector_free(&self->empty_space);
+}
+
+SExp* Interp_ref(Interp *self, SExpRef ref) {
+ if (ref.idx > SExpVector_len(&self->objs)) return NULL;
+ SExp *res = SExpVector_ref(&self->objs, ref.idx);
+ return res;
+}
+
+void Interp_gc(Interp *interp) {
+ // TODO
+}
+
+SExpRef new_sexp(Interp *interp) {
+ if (IntVector_len(&interp->empty_space) == 0) {
+ if (interp->gc_paused) {
+ SExp sexp;
+ sexp.type = kEmptySExp;
+ SExpVector_push_back(&interp->objs, sexp);
+ return (SExpRef){ SExpVector_len(&interp->objs) - 1 };
+ } else Interp_gc(interp);
+ }
+ int idx = *IntVector_ref(&interp->empty_space, IntVector_len(&interp->empty_space) - 1);
+ IntVector_pop(&interp->empty_space);
+ return (SExpRef){idx};
+}
+
+SExpRef new_boolean(Interp *interp, bool val) {
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kBooleanSExp;
+ psexp->boolean = val;
+ return ret;
+}
+
+SExpRef new_char(Interp *interp, char val) {
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kCharSExp;
+ psexp->character = val;
+ return ret;
+}
+
+SExpRef new_integer(Interp *interp, int64_t val) {
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kIntegerSExp;
+ psexp->integer = val;
+ return ret;
+}
+
+SExpRef new_real(Interp *interp, double val) {
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kRealSExp;
+ psexp->real = val;
+ return ret;
+}
+
+SExpRef new_string(Interp *interp, const char *val) {
+ char *dup = strdup(val);
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kStringSExp;
+ psexp->str = dup;
+ return ret;
+}
+
+SExpRef new_symbol(Interp *interp, const char *val) {
+ String2IntHashTableIter iter = String2IntHashTable_find(&interp->symbols, val);
+ if (iter == NULL) {
+ char *dup = strdup(val);
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kSymbolSExp;
+ psexp->str = dup;
+ String2IntHashTable_insert(&interp->symbols, dup, ret.idx);
+ return ret;
+ } else {
+ return (SExpRef){ iter->val };
+ }
+}
+
+SExpRef cons(Interp *interp, SExpRef car, SExpRef cdr) {
+ SExpRef ret = new_sexp(interp);
+ SExp *psexp = Interp_ref(interp, ret);
+ psexp->type = kPairSExp;
+ psexp->pair.car = car;
+ psexp->pair.cdr = cdr;
+ return ret;
+}
+
+SExpRef new_list1(Interp *interp, SExpRef e1) {
+ return cons(interp, e1, interp->nil);
+}
+
+SExpRef new_list2(Interp *interp, SExpRef e1, SExpRef e2) {
+ return cons(interp, e1, new_list1(interp, e2));
+}
+
+SExpRef new_list3(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3) {
+ return cons(interp, e1, new_list2(interp, e2, e3));
+}
+
+SExpRef new_list4(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4) {
+ return cons(interp, e1, new_list3(interp, e2, e3, e4));
+}
+
+SExpRef new_list5(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5) {
+ return cons(interp, e1, new_list4(interp, e2, e3, e4, e5));
+}
+
diff --git a/src/interp.h b/src/interp.h
new file mode 100644
index 0000000..6ec1c6d
--- /dev/null
+++ b/src/interp.h
@@ -0,0 +1,43 @@
+#ifndef BAMBOO_LISP_INTERP_H_
+#define BAMBOO_LISP_INTERP_H_
+
+#include <stdbool.h>
+
+#include <algds/hash_table.h>
+
+#include "sexp.h"
+
+typedef struct {
+ bool gc_paused;
+ SExpVector objs;
+ IntVector empty_space;
+ String2IntHashTable symbols;
+ SExpRef stack;
+ SExpRef evaluating;
+ SExpRef top_level;
+ SExpRef nil;
+} Interp;
+
+void Interp_init(Interp *self);
+void Interp_free(Interp *self);
+SExp* Interp_ref(Interp *self, SExpRef ref);
+void Interp_gc(Interp *self);
+void Interp_pause_gc(Interp *self);
+void Interp_restart_gc(Interp *self);
+
+SExpRef new_sexp(Interp *ctx);
+SExpRef new_boolean(Interp *ctx, bool val);
+SExpRef new_char(Interp *ctx, char val);
+SExpRef new_integer(Interp *ctx, int64_t val);
+SExpRef new_real(Interp *ctx, double val);
+SExpRef new_string(Interp *ctx, const char *val);
+SExpRef new_symbol(Interp *ctx, const char *val);
+SExpRef cons(Interp *ctx, SExpRef car, SExpRef cdr);
+SExpRef new_list1(Interp *ctx, SExpRef e1);
+SExpRef new_list2(Interp *ctx, SExpRef e1, SExpRef e2);
+SExpRef new_list3(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3);
+SExpRef new_list4(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4);
+SExpRef new_list5(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5);
+
+#endif
+
diff --git a/src/parser.c b/src/parser.c
index d164186..ff7d625 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,11 +1,15 @@
#include "parser.h"
+#include "sexp.h"
#include <ctype.h>
#include <stdlib.h>
+#include <stdarg.h>
-static void skip_spaces(Parser *ctx) {
- while (isspace(parser_peek(ctx))) {
- parser_getchar(ctx);
+#define BUFSIZE 1024
+
+static void skip_spaces(Parser *parser) {
+ while (isspace(Parser_peek(parser))) {
+ Parser_getchar(parser);
}
}
@@ -13,26 +17,325 @@ ParseResult ParseOk(SExpRef ref) {
return (ParseResult){ .val = ref, .errmsg = NULL };
}
-ParseResult ParseErr(const char *msg) {
- return (ParseResult){ .val = {-1}, .errmsg = msg };
+ParseResult ParseErr(Parser *parser, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ vsnprintf(parser->errmsg_buf, BUFSIZE, format, args);
+ va_end(args);
+ return (ParseResult){ .val = {-1}, .errmsg = parser->errmsg_buf };
+}
+
+bool ParseResult_is_err(ParseResult res) {
+ if (res.errmsg != NULL) return true;
+ return false;
+}
+
+void Parser_init(Parser *parser) {
+ parser->token_buf = malloc(BUFSIZE);
+ parser->errmsg_buf = malloc(BUFSIZE);
+}
+
+void Parser_free(Parser *parser) {
+ free(parser->token_buf);
+ free(parser->errmsg_buf);
+}
+
+void Parser_set_string(Parser *parser, const char *str) {
+ parser->parse_type = kParseString;
+ parser->string = str;
+ parser->str_cursor = str;
}
-ParseResult parse_sexp(Parser *ctx) {
- skip_spaces(ctx);
- int next = parser_peek(ctx);
+void Parser_set_file(Parser *parser, FILE *fp) {
+ parser->parse_type = kParseFile;
+ parser->fp = fp;
+}
+
+int Parser_getchar(Parser *ctx) {
+ if (ctx->parse_type == kParseString) {
+ if (*ctx->str_cursor == '\0') return EOF;
+ int ret = *ctx->str_cursor;
+ ctx->str_cursor++;
+ return ret;
+ } else if (ctx->parse_type == kParseFile) {
+ return fgetc(ctx->fp);
+ }
+ return EOF;
+}
+
+int Parser_peek(Parser *ctx) {
+ if (ctx->parse_type == kParseString) {
+ if (*ctx->str_cursor == '\0') return EOF;
+ int ret = *ctx->str_cursor;
+ return ret;
+ } else if (ctx->parse_type == kParseFile) {
+ int ret = fgetc(ctx->fp);
+ if (ret == EOF) return EOF;
+ ungetc(ret, ctx->fp);
+ return ret;
+ }
+ return EOF;
+}
+
+ParseResult parse_sexp(Parser *parser) {
+ skip_spaces(parser);
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ int next = Parser_peek(parser);
if (next == '(') {
- return parse_list(ctx);
+ return parse_list(parser);
} else if (next == ',') {
- parser_getchar(ctx);
- if (parser_peek(ctx) == '@') {
- return parse_slicing_unquote(ctx);
+ Parser_getchar(parser);
+ if (Parser_peek(parser) == '@') {
+ Parser_getchar(parser);
+ return parse_slicing_unquote(parser);
}
- return parse_unquote(ctx);
+ return parse_unquote(parser);
} else if (next == '`') {
- return parse_quasi(ctx);
+ Parser_getchar(parser);
+ return parse_quasi(parser);
} else if (next == '\'') {
- return parse_quote(ctx);
+ Parser_getchar(parser);
+ return parse_quote(parser);
+ }
+ return parse_atom(parser);
+}
+
+static ParseResult expect_char(Parser *parser, int chr) {
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ if (Parser_peek(parser) == chr) {
+ Parser_getchar(parser);
+ return ParseOk(parser->ctx->nil);
+ }
+ return ParseErr(parser, "Unexpected character %c.\n", (char)chr);
+}
+
+static ParseResult expect_space(Parser *parser) {
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ if (isspace(Parser_peek(parser))) {
+ Parser_getchar(parser);
+ return ParseOk(parser->ctx->nil);
+ }
+ return ParseErr(parser, "Expect space.\n");
+}
+
+static SExpRef build_list_from_vector(Interp *ctx, SExpRefVector elems) {
+ int i = SExpRefVector_len(&elems) - 1;
+ SExpRef ret = *SExpRefVector_ref(&elems, i);
+ for (; i >= 0; i--) {
+ SExpRef cur = *SExpRefVector_ref(&elems, i);
+ ret = cons(ctx, cur, ret);
+ }
+ return ret;
+}
+
+ParseResult parse_list(Parser *parser) {
+ SExpRefVector elems;
+ SExpRefVector_init(&elems);
+ ParseResult ret;
+
+ ret = expect_char(parser, '(');
+ if (ParseResult_is_err(ret)) goto end;
+ skip_spaces(parser);
+ while (1) {
+ if (Parser_peek(parser) == EOF) {
+ ret = ParseErr(parser, "Unexpected EOF.\n.");
+ goto end;
+ }
+ if (Parser_peek(parser) == ')') {
+ Parser_getchar(parser);
+ SExpRefVector_push_back(&elems, parser->ctx->nil);
+ ret = ParseOk(build_list_from_vector(parser->ctx, elems));
+ goto end;
+ } else if (Parser_peek(parser) == '.') {
+ Parser_getchar(parser);
+ break;
+ }
+ ret = parse_sexp(parser);
+ if (ParseResult_is_err(ret)) goto end;
+ SExpRefVector_push_back(&elems, ret.val);
+ ret = expect_space(parser);
+ if (ParseResult_is_err(ret)) goto end;
+ skip_spaces(parser);
}
- return parse_atom(ctx);
+ // dot
+ ret = expect_space(parser);
+ if (ParseResult_is_err(ret)) goto end;
+ skip_spaces(parser);
+ ret = parse_sexp(parser);
+ if (ParseResult_is_err(ret)) goto end;
+ SExpRefVector_push_back(&elems, ret.val);
+ skip_spaces(parser);
+ ret = expect_char(parser, ')');
+ if (ParseResult_is_err(ret)) goto end;
+ ret = ParseOk(build_list_from_vector(parser->ctx, elems));
+end:
+ SExpRefVector_free(&elems);
+ return ret;
+}
+
+static char *read_token(Parser *parser) {
+ int i = 0;
+ while (!isspace(Parser_peek(parser))
+ && Parser_peek(parser) != EOF
+ && i < BUFSIZE - 1) {
+ parser->token_buf[i] = Parser_getchar(parser);
+ i++;
+ }
+ if (i > 1022) return NULL;
+ parser->token_buf[i] = '\0';
+ return parser->token_buf;
+}
+
+static bool is_symbol_init(char c) {
+ if (isalpha(c)) return true;
+ if (c == '!') return true;
+ if (c == '$') return true;
+ if (c == '%') return true;
+ if (c == '&') return true;
+ if (c == '*') return true;
+ if (c == '/') return true;
+ if (c == ':') return true;
+ if (c == '<') return true;
+ if (c == '=') return true;
+ if (c == '>') return true;
+ if (c == '?') return true;
+ if (c == '^') return true;
+ if (c == '_') return true;
+ if (c == '~') return true;
+ return false;
+}
+
+static bool is_symbol_subsequent(char c) {
+ if (is_symbol_init(c)) return true;
+ if (isdigit(c)) return true;
+ if (c == '+') return true;
+ if (c == '-') return true;
+ if (c == '.') return true;
+ if (c == '@') return true;
+ return false;
+}
+
+static ParseResult parse_token(Parser *parser, const char *token) {
+ int len = strlen(token);
+ if (len == 0) return ParseErr(parser, "Empty token.\n");
+ if (len == 1) {
+ if (token[0] == '-' || token[0] == '+') {
+ return ParseOk(new_symbol(parser->ctx, token));
+ }
+ }
+ if (token[0] == '#') {
+ if (len < 2) return ParseErr(parser, "Expect boolean or character.\n");
+ if (token[1] == 't') return ParseOk(new_boolean(parser->ctx, true));
+ if (token[1] == 'f') return ParseOk(new_boolean(parser->ctx, false));
+ if (token[1] == '\\') {
+ if (len < 3) return ParseErr(parser, "Expect character.\n");
+ if (len == 3) return ParseOk(new_char(parser->ctx, token[2]));
+ if (strcmp(token+2, "newline") == 0) return ParseOk(new_char(parser->ctx, '\n'));
+ if (strcmp(token+2, "space") == 0) return ParseOk(new_char(parser->ctx, ' '));
+ if (strcmp(token+2, "tab") == 0) return ParseOk(new_char(parser->ctx, '\t'));
+ if (strcmp(token+2, "return") == 0) return ParseOk(new_char(parser->ctx, '\r'));
+ return ParseErr(parser, "Unknown character name: %s\n.", token + 2);
+ }
+ }
+ if (is_symbol_init(token[0])) {
+ for (int i = 1; i < len; i++) {
+ if (!is_symbol_subsequent(token[i])) {
+ return ParseErr(parser, "Not a symbol, containing illegal character: %s\n.", token);
+ }
+ }
+ return ParseOk(new_symbol(parser->ctx, token));
+ }
+ char *endptr;
+ int64_t integer = strtoll(token, &endptr, 10);
+ if (endptr == token + len) return ParseOk(new_integer(parser->ctx, integer));
+ double real = strtod(token, &endptr);
+ if (endptr == token + len) return ParseOk(new_real(parser->ctx, real));
+ return ParseErr(parser, "Not a number : %s\n.", token);
+}
+
+ParseResult parse_string(Parser *parser) {
+ ParseResult ret;
+ CharVector buf;
+ CharVector_init(&buf);
+ Parser_getchar(parser);
+ while (Parser_peek(parser) != '"') {
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ if (Parser_peek(parser) == '\0') {
+ ret = ParseErr(parser, "Unexpected zero terminator.\n");
+ goto end;
+ }
+ if (Parser_peek(parser) != '\\') {
+ CharVector_push_back(&buf, Parser_getchar(parser));
+ } else {
+ Parser_getchar(parser);
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ int c = Parser_getchar(parser);
+ if (c == EOF) {
+ ret = ParseErr(parser, "Unexpected EOF: %c.\n", c);
+ goto end;
+ } else if (c == '\\') CharVector_push_back(&buf, '\\');
+ else if (c == 't') CharVector_push_back(&buf, '\t');
+ else if (c == 'n') CharVector_push_back(&buf, '\n');
+ else if (c == 'r') CharVector_push_back(&buf, '\r');
+ else if (c == '"') CharVector_push_back(&buf, '"');
+ else {
+ ret = ParseErr(parser, "Unexpected escape char: %c.\n", c);
+ goto end;
+ }
+ }
+ }
+ CharVector_push_back(&buf, '\0');
+ ret = ParseOk(new_string(parser->ctx, buf.buffer));
+end:
+ CharVector_free(&buf);
+ return ret;
+}
+
+ParseResult parse_atom(Parser *parser) {
+ ParseResult ret;
+ if (Parser_peek(parser) == EOF) {
+ return ParseErr(parser, "Unexpected EOF.\n.");
+ }
+ if (Parser_peek(parser) == '"') return parse_string(parser);
+ const char *token = read_token(parser);
+ if (token == NULL) return ParseErr(parser, "Token too long.\n");
+ return parse_token(parser, token);
+}
+
+ParseResult parse_abbrev(Parser *parser, const char *name) {
+ if (isspace(Parser_peek(parser))) {
+ return ParseErr(parser, "Unexpected space.\n");
+ }
+ ParseResult ret;
+ ret = parse_sexp(parser);
+ if (ParseResult_is_err(ret)) return ret;
+ SExpRef sym = new_symbol(parser->ctx, name);
+ return ParseOk(cons(parser->ctx, sym, ret.val));
+}
+
+ParseResult parse_quote(Parser *parser) {
+ return parse_abbrev(parser, "quote");
+}
+
+ParseResult parse_unquote(Parser *parser) {
+ return parse_abbrev(parser, "unquote");
+}
+
+ParseResult parse_slicing_unquote(Parser *parser) {
+ return parse_abbrev(parser, "slicing-unquote");
+}
+
+ParseResult parse_quasi(Parser *parser) {
+ return parse_abbrev(parser, "quasiquote");
}
diff --git a/src/parser.h b/src/parser.h
index 3f159b8..cefc946 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -3,33 +3,59 @@
#include <stdbool.h>
+#include "interp.h"
#include "sexp.h"
-typedef struct {
+typedef enum {
+ kParseString,
+ kParseFile,
+} ParseType;
+typedef struct {
+ Interp *ctx;
+ char *errmsg_buf;
+ char *token_buf;
+
+ ParseType parse_type;
+ union {
+ struct {
+ const char *string;
+ const char *str_cursor;
+ };
+ FILE *fp;
+ };
} Parser;
+void Parser_init(Parser *self);
+void Parser_free(Parser *self);
+int Parser_getchar(Parser *self);
+int Parser_peek(Parser *self);
+void Parser_set_string(Parser *parser, const char *str);
+void Parser_set_file(Parser *parser, FILE *fp);
+
typedef struct {
SExpRef val;
const char *errmsg;
} ParseResult;
-int parser_getchar(Parser *ctx);
-int parser_peek(Parser *ctx);
-
-ParseResult parse_sexp(Parser *ctx);
-ParseResult parse_list(Parser *ctx);
-ParseResult parse_quote(Parser *ctx);
-ParseResult parse_unquote(Parser *ctx);
-ParseResult parse_slicing_unquote(Parser *ctx);
-ParseResult parse_quasi(Parser *ctx);
-ParseResult parse_atom(Parser *ctx);
-ParseResult parse_number(Parser *ctx);
-ParseResult parse_integer(Parser *ctx);
-ParseResult parse_real(Parser *ctx);
-ParseResult parse_symbol(Parser *ctx);
-ParseResult parse_string(Parser *ctx);
-ParseResult parse_char(Parser *ctx);
+ParseResult ParseOk(SExpRef ref);
+ParseResult ParseErr(Parser *parser, const char *format, ...);
+bool ParseResult_is_err(ParseResult res);
+
+
+ParseResult parse_sexp(Parser *parser);
+ParseResult parse_list(Parser *parser);
+ParseResult parse_quote(Parser *parser);
+ParseResult parse_unquote(Parser *parser);
+ParseResult parse_slicing_unquote(Parser *parser);
+ParseResult parse_quasi(Parser *parser);
+ParseResult parse_atom(Parser *parser);
+ParseResult parse_number(Parser *parser);
+ParseResult parse_integer(Parser *parser);
+ParseResult parse_real(Parser *parser);
+ParseResult parse_symbol(Parser *parser);
+ParseResult parse_string(Parser *parser);
+ParseResult parse_char(Parser *parser);
#endif
diff --git a/src/sexp.c b/src/sexp.c
index b268414..5e80d07 100644
--- a/src/sexp.c
+++ b/src/sexp.c
@@ -1,8 +1,30 @@
#include "sexp.h"
#include "algds/vec.h"
+#include <inttypes.h>
+
+void SExpRef_show(SExpRef self, FILE* fp) { }
+
void SExp_show(SExp self, FILE* fp) {
- fprintf(fp, "{SEXP}");
+ if (self.type == kEmptySExp) fprintf(fp, "<EMPTY>");
+ else if (self.type == kIntegerSExp) fprintf(fp, "%"PRId64, self.integer);
+ else if (self.type == kRealSExp) fprintf(fp, "%lf", self.real);
+ else if (self.type == kBooleanSExp) {
+ if (self.boolean) fprintf(fp, "#t");
+ else fprintf(fp, "#f");
+ } else if (self.type == kNilSExp) fprintf(fp, "()");
+ else if (self.type == kCharSExp) fprintf(fp, "#\\%c", self.character);
+ else if (self.type == kStringSExp) fprintf(fp, "\"%s\"", self.str);
+ else if (self.type == kSymbolSExp) fprintf(fp, "'%s", self.str);
+ else if (self.type == kUserDataSExp) fprintf(fp, "<%p>", self.userdata);
+ else if (self.type == kFuncSExp) fprintf(fp, "<FUNCTION>");
+ else if (self.type == kPairSExp) {
+ fprintf(fp, "(<%d> . <%d>)", self.pair.car.idx, self.pair.cdr.idx);
+ }
+ else if (self.type == kEnvSExp) fprintf(fp, "<Env>");
+ else if (self.type == kBindingSExp) fprintf(fp, "<BINDING>");
+ else if (self.type == kMacroSExp) fprintf(fp, "<MACRO>");
}
VECTOR_IMPL(SExp);
+VECTOR_IMPL(SExpRef);
diff --git a/src/sexp.h b/src/sexp.h
index dfbc4d9..92ff11b 100644
--- a/src/sexp.h
+++ b/src/sexp.h
@@ -21,6 +21,7 @@ typedef struct {
typedef struct {
SExpRef args;
SExpRef body;
+ SExpRef env;
} SExpFunc;
typedef struct {
@@ -30,7 +31,6 @@ typedef struct {
typedef struct {
SExpRef parent;
- SExpRef child;
SExpRef bindings;
} SExpEnv;
@@ -42,10 +42,11 @@ typedef struct {
} SExpBinding;
typedef enum {
+ kEmptySExp,
kIntegerSExp,
kRealSExp,
kBooleanSExp,
- kNumberSExp,
+ kNilSExp,
kCharSExp,
kStringSExp,
kSymbolSExp,
@@ -58,6 +59,7 @@ typedef enum {
} SExpType;
struct sexp {
+ bool marked;
SExpType type;
union {
int64_t integer;
@@ -73,8 +75,10 @@ struct sexp {
};
void SExp_show(SExp self, FILE* fp);
+void SExpRef_show(SExpRef self, FILE* fp);
VECTOR_DEF(SExp);
+VECTOR_DEF(SExpRef);
#endif
diff --git a/tests/test_parser.c b/tests/test_parser.c
new file mode 100644
index 0000000..c0587cb
--- /dev/null
+++ b/tests/test_parser.c
@@ -0,0 +1,82 @@
+#include <assert.h>
+#include <stdio.h>
+
+#include "interp.h"
+#include "parser.h"
+#include "sexp.h"
+
+ParseResult parse_str(Parser *parser, const char* str) {
+ Parser_set_string(parser, str);
+ return parse_sexp(parser);
+}
+
+#define ATOM_TEST(_str, _type_enum, _field, _expect) \
+{ \
+ res = parse_str(&parser, (_str)); \
+ assert(!ParseResult_is_err(res)); \
+ sexp = *Interp_ref(&interp, res.val); \
+ assert(sexp.type == (_type_enum)); \
+ assert(sexp._field == (_expect)); \
+}
+
+#define STRING_TEST(_str, _type_enum, _expect) \
+{ \
+ res = parse_str(&parser, (_str)); \
+ assert(!ParseResult_is_err(res)); \
+ sexp = *Interp_ref(&interp, res.val); \
+ assert(sexp.type == _type_enum); \
+ assert(strcmp(sexp.str, (_expect)) == 0); \
+}
+
+#define ERROR_TEST(_str) \
+{ \
+ res = parse_str(&parser, (_str)); \
+ assert(ParseResult_is_err(res)); \
+}
+
+int main() {
+ printf("[TEST] parser\n");
+ Interp interp;
+ Parser parser;
+ Interp_init(&interp);
+ Parser_init(&parser);
+ parser.ctx = &interp;
+
+ ParseResult res;
+ SExp sexp;
+
+ ATOM_TEST("1.11", kRealSExp, real, 1.11);
+ ATOM_TEST("-1.11", kRealSExp, real, -1.11);
+ ATOM_TEST("1.11e10", kRealSExp, real, 1.11e10);
+ ATOM_TEST("1.11 ", kRealSExp, real, 1.11);
+ ATOM_TEST("1.11e10 ", kRealSExp, real, 1.11e10);
+ ATOM_TEST(" 1.11 ", kRealSExp, real, 1.11);
+ ATOM_TEST(" 1.11e10 ", kRealSExp, real, 1.11e10);
+ ERROR_TEST("123.1x");
+
+ ATOM_TEST("42", kIntegerSExp, integer, 42);
+ ATOM_TEST("-42", kIntegerSExp, integer, -42);
+ ERROR_TEST("123x");
+
+ ATOM_TEST("#t", kBooleanSExp, boolean, true);
+ ATOM_TEST("#f", kBooleanSExp, boolean, false);
+ ERROR_TEST("#x");
+
+ ATOM_TEST("#\\t", kCharSExp, character, 't');
+ ATOM_TEST("#\\newline", kCharSExp, character, '\n');
+ ERROR_TEST("#\\uwu");
+
+ STRING_TEST("\"test\"", kStringSExp, "test");
+ STRING_TEST("\"t\\nest\"", kStringSExp, "t\nest");
+ STRING_TEST("!uwu", kSymbolSExp, "!uwu");
+ STRING_TEST("-", kSymbolSExp, "-");
+ STRING_TEST("+", kSymbolSExp, "+");
+ ERROR_TEST("\"t\\xst\"");
+ ERROR_TEST("-abc");
+ ERROR_TEST("@1");
+ ERROR_TEST("a|");
+
+ Interp_free(&interp);
+ Parser_free(&parser);
+ printf("[PASS] parser\n");
+}