diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| m--------- | 3rdparty/algds | 0 | ||||
| -rw-r--r-- | Makefile | 13 | ||||
| -rwxr-xr-x | scripts/runall.sh | 9 | ||||
| -rw-r--r-- | src/bamboo.c | 22 | ||||
| -rw-r--r-- | src/bamboo.h | 32 | ||||
| -rw-r--r-- | src/interp.c | 150 | ||||
| -rw-r--r-- | src/interp.h | 43 | ||||
| -rw-r--r-- | src/parser.c | 335 | ||||
| -rw-r--r-- | src/parser.h | 60 | ||||
| -rw-r--r-- | src/sexp.c | 24 | ||||
| -rw-r--r-- | src/sexp.h | 8 | ||||
| -rw-r--r-- | tests/test_parser.c | 82 |
13 files changed, 684 insertions, 95 deletions
@@ -1,6 +1,7 @@ *.o *.d *.a +*.bin bamboo-lisp compile_commands.json .cache diff --git a/3rdparty/algds b/3rdparty/algds -Subproject b8e8f46f58136464c4fdd0aa37578f2313f0bd9 +Subproject 111a1c8b9f4bafb627dd3911857943ae9a65f29 @@ -1,7 +1,8 @@ mode ?= debug cc = gcc includes = -I3rdparty/algds/build/include/ -ldflags = -L3rdparty/algds/build/lib/ -lalgds +3rdlibs = 3rdparty/algds/build/lib/libalgds.a +ldflags = # -L3rdparty/algds/build/lib/ -lalgds ifeq ($(mode), debug) cflags = $(includes) \ -g \ @@ -18,8 +19,8 @@ tests_bin=$(tests:.c=.bin) all: bamboo-lisp -bamboo-lisp: 3rdparty/algds/build/lib/libalgds.a $(obj) src/main.c - gcc $(ldflags) $(cflags) -o $@ $(obj) src/main.c +bamboo-lisp: $(obj) src/main.c 3rdparty/algds/build/lib/libalgds.a + gcc $(ldflags) $(cflags) -o $@ $^ 3rdparty/algds/build/lib/libalgds.a: cd 3rdparty/algds && \ @@ -33,8 +34,10 @@ test: $(tests_bin) $(obj):%.o:%.c $(cc) -c $(cflags) $< -MD -MF $@.d -o $@ -$(tests_bin):%.bin:%.c $(obj) - $(cc) $(ldflags) $(cflags) -Isrc/ $< $(obj) -MD -MF $@.d -o $@ +$(obj):%.o:$(3rdlibs) + +$(tests_bin):%.bin:%.c $(obj) $(3rdlibs) + $(cc) $(ldflags) $(cflags) -Isrc/ $< $(obj) $(3rdlibs) -MD -MF $@.d -o $@ clean: -rm $(shell find tests/ -name '*.bin') diff --git a/scripts/runall.sh b/scripts/runall.sh new file mode 100755 index 0000000..3fdc745 --- /dev/null +++ b/scripts/runall.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +for var in "$@"; do + ./$var + if [ $? -ne 0 ]; then + exit 255 + fi +done + diff --git a/src/bamboo.c b/src/bamboo.c deleted file mode 100644 index 70a84f5..0000000 --- a/src/bamboo.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "bamboo.h" - -SExpRef new_list1(Bamboo *ctx, SExpRef e1) { - return cons(ctx, e1, nil(ctx)); -} - -SExpRef new_list2(Bamboo *ctx, SExpRef e1, SExpRef e2) { - return cons(ctx, e1, new_list1(ctx, e2)); -} - -SExpRef new_list3(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3) { - return cons(ctx, e1, new_list2(ctx, e2, e3)); -} - -SExpRef new_list4(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4) { - return cons(ctx, e1, new_list3(ctx, e2, e3, e4)); -} - -SExpRef new_list5(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5) { - return cons(ctx, e1, new_list4(ctx, e2, e3, e4, e5)); -} - diff --git a/src/bamboo.h b/src/bamboo.h deleted file mode 100644 index a424be6..0000000 --- a/src/bamboo.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef BAMBOO_LISP_BAMBOO_H_ -#define BAMBOO_LISP_BAMBOO_H_ - -#include <algds/hash_table.h> - -#include "sexp.h" - -typedef struct { - SExpVector objs; - String2IntHashTable symbols; -} Bamboo; - -void Bamboo_init(Bamboo *self); -SExp* Bamboo_ref(Bamboo *self, SExpRef ref); -// TODO: Heap_gc() - -SExpRef new_integer(Bamboo *ctx, int64_t val); -SExpRef new_real(Bamboo *ctx, double val); -SExpRef new_string(Bamboo *ctx, const char *val); -SExpRef new_symbol(Bamboo *ctx, const char *val); -SExpRef cons(Bamboo *ctx, SExpRef car, SExpRef cdr); -SExpRef nil(Bamboo *ctx); -SExpRef new_list1(Bamboo *ctx, SExpRef e1); -SExpRef new_list2(Bamboo *ctx, SExpRef e1, SExpRef e2); -SExpRef new_list3(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3); -SExpRef new_list4(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4); -SExpRef new_list5(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5); -SExpRef new_list6(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5, SExpRef e6); -SExpRef new_list7(Bamboo *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5, SExpRef e6, SExpRef e7); - -#endif - diff --git a/src/interp.c b/src/interp.c new file mode 100644 index 0000000..bf49dd5 --- /dev/null +++ b/src/interp.c @@ -0,0 +1,150 @@ +#include "interp.h" +#include "algds/hash_table.h" +#include "sexp.h" + +void Interp_init(Interp *self) { + SExpVector_init(&self->objs); + IntVector_init(&self->empty_space); + String2IntHashTable_init(&self->symbols); + self->gc_paused = false; + SExp sexp; + sexp.type = kNilSExp; + SExpVector_push_back(&self->objs, sexp); + self->nil = (SExpRef){0}; + + sexp.type = kEnvSExp; + sexp.env.parent= self->nil; + sexp.env.bindings = self->nil; + SExpVector_push_back(&self->objs, sexp); + self->top_level = (SExpRef){1}; + sexp.type = kEmptySExp; + for (int i = 1; i < 1024; i++) { + SExpVector_push_back(&self->objs, sexp); + IntVector_push_back(&self->empty_space, i); + } + + self->evaluating = self->nil; + self->stack = cons(self, self->top_level, self->nil); +} + +void Interp_free(Interp *self) { + for (size_t i = 0; i < SExpVector_len(&self->objs); i++) { + SExp *obj = SExpVector_ref(&self->objs, i); + if (obj->type == kSymbolSExp || obj->type == kStringSExp) { + free((void*)obj->str); + } + } + String2IntHashTable_free(&self->symbols); + SExpVector_free(&self->objs); + IntVector_free(&self->empty_space); +} + +SExp* Interp_ref(Interp *self, SExpRef ref) { + if (ref.idx > SExpVector_len(&self->objs)) return NULL; + SExp *res = SExpVector_ref(&self->objs, ref.idx); + return res; +} + +void Interp_gc(Interp *interp) { + // TODO +} + +SExpRef new_sexp(Interp *interp) { + if (IntVector_len(&interp->empty_space) == 0) { + if (interp->gc_paused) { + SExp sexp; + sexp.type = kEmptySExp; + SExpVector_push_back(&interp->objs, sexp); + return (SExpRef){ SExpVector_len(&interp->objs) - 1 }; + } else Interp_gc(interp); + } + int idx = *IntVector_ref(&interp->empty_space, IntVector_len(&interp->empty_space) - 1); + IntVector_pop(&interp->empty_space); + return (SExpRef){idx}; +} + +SExpRef new_boolean(Interp *interp, bool val) { + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kBooleanSExp; + psexp->boolean = val; + return ret; +} + +SExpRef new_char(Interp *interp, char val) { + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kCharSExp; + psexp->character = val; + return ret; +} + +SExpRef new_integer(Interp *interp, int64_t val) { + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kIntegerSExp; + psexp->integer = val; + return ret; +} + +SExpRef new_real(Interp *interp, double val) { + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kRealSExp; + psexp->real = val; + return ret; +} + +SExpRef new_string(Interp *interp, const char *val) { + char *dup = strdup(val); + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kStringSExp; + psexp->str = dup; + return ret; +} + +SExpRef new_symbol(Interp *interp, const char *val) { + String2IntHashTableIter iter = String2IntHashTable_find(&interp->symbols, val); + if (iter == NULL) { + char *dup = strdup(val); + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kSymbolSExp; + psexp->str = dup; + String2IntHashTable_insert(&interp->symbols, dup, ret.idx); + return ret; + } else { + return (SExpRef){ iter->val }; + } +} + +SExpRef cons(Interp *interp, SExpRef car, SExpRef cdr) { + SExpRef ret = new_sexp(interp); + SExp *psexp = Interp_ref(interp, ret); + psexp->type = kPairSExp; + psexp->pair.car = car; + psexp->pair.cdr = cdr; + return ret; +} + +SExpRef new_list1(Interp *interp, SExpRef e1) { + return cons(interp, e1, interp->nil); +} + +SExpRef new_list2(Interp *interp, SExpRef e1, SExpRef e2) { + return cons(interp, e1, new_list1(interp, e2)); +} + +SExpRef new_list3(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3) { + return cons(interp, e1, new_list2(interp, e2, e3)); +} + +SExpRef new_list4(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4) { + return cons(interp, e1, new_list3(interp, e2, e3, e4)); +} + +SExpRef new_list5(Interp *interp, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5) { + return cons(interp, e1, new_list4(interp, e2, e3, e4, e5)); +} + diff --git a/src/interp.h b/src/interp.h new file mode 100644 index 0000000..6ec1c6d --- /dev/null +++ b/src/interp.h @@ -0,0 +1,43 @@ +#ifndef BAMBOO_LISP_INTERP_H_ +#define BAMBOO_LISP_INTERP_H_ + +#include <stdbool.h> + +#include <algds/hash_table.h> + +#include "sexp.h" + +typedef struct { + bool gc_paused; + SExpVector objs; + IntVector empty_space; + String2IntHashTable symbols; + SExpRef stack; + SExpRef evaluating; + SExpRef top_level; + SExpRef nil; +} Interp; + +void Interp_init(Interp *self); +void Interp_free(Interp *self); +SExp* Interp_ref(Interp *self, SExpRef ref); +void Interp_gc(Interp *self); +void Interp_pause_gc(Interp *self); +void Interp_restart_gc(Interp *self); + +SExpRef new_sexp(Interp *ctx); +SExpRef new_boolean(Interp *ctx, bool val); +SExpRef new_char(Interp *ctx, char val); +SExpRef new_integer(Interp *ctx, int64_t val); +SExpRef new_real(Interp *ctx, double val); +SExpRef new_string(Interp *ctx, const char *val); +SExpRef new_symbol(Interp *ctx, const char *val); +SExpRef cons(Interp *ctx, SExpRef car, SExpRef cdr); +SExpRef new_list1(Interp *ctx, SExpRef e1); +SExpRef new_list2(Interp *ctx, SExpRef e1, SExpRef e2); +SExpRef new_list3(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3); +SExpRef new_list4(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4); +SExpRef new_list5(Interp *ctx, SExpRef e1, SExpRef e2, SExpRef e3, SExpRef e4, SExpRef e5); + +#endif + diff --git a/src/parser.c b/src/parser.c index d164186..ff7d625 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,11 +1,15 @@ #include "parser.h" +#include "sexp.h" #include <ctype.h> #include <stdlib.h> +#include <stdarg.h> -static void skip_spaces(Parser *ctx) { - while (isspace(parser_peek(ctx))) { - parser_getchar(ctx); +#define BUFSIZE 1024 + +static void skip_spaces(Parser *parser) { + while (isspace(Parser_peek(parser))) { + Parser_getchar(parser); } } @@ -13,26 +17,325 @@ ParseResult ParseOk(SExpRef ref) { return (ParseResult){ .val = ref, .errmsg = NULL }; } -ParseResult ParseErr(const char *msg) { - return (ParseResult){ .val = {-1}, .errmsg = msg }; +ParseResult ParseErr(Parser *parser, const char *format, ...) { + va_list args; + va_start(args, format); + vsnprintf(parser->errmsg_buf, BUFSIZE, format, args); + va_end(args); + return (ParseResult){ .val = {-1}, .errmsg = parser->errmsg_buf }; +} + +bool ParseResult_is_err(ParseResult res) { + if (res.errmsg != NULL) return true; + return false; +} + +void Parser_init(Parser *parser) { + parser->token_buf = malloc(BUFSIZE); + parser->errmsg_buf = malloc(BUFSIZE); +} + +void Parser_free(Parser *parser) { + free(parser->token_buf); + free(parser->errmsg_buf); +} + +void Parser_set_string(Parser *parser, const char *str) { + parser->parse_type = kParseString; + parser->string = str; + parser->str_cursor = str; } -ParseResult parse_sexp(Parser *ctx) { - skip_spaces(ctx); - int next = parser_peek(ctx); +void Parser_set_file(Parser *parser, FILE *fp) { + parser->parse_type = kParseFile; + parser->fp = fp; +} + +int Parser_getchar(Parser *ctx) { + if (ctx->parse_type == kParseString) { + if (*ctx->str_cursor == '\0') return EOF; + int ret = *ctx->str_cursor; + ctx->str_cursor++; + return ret; + } else if (ctx->parse_type == kParseFile) { + return fgetc(ctx->fp); + } + return EOF; +} + +int Parser_peek(Parser *ctx) { + if (ctx->parse_type == kParseString) { + if (*ctx->str_cursor == '\0') return EOF; + int ret = *ctx->str_cursor; + return ret; + } else if (ctx->parse_type == kParseFile) { + int ret = fgetc(ctx->fp); + if (ret == EOF) return EOF; + ungetc(ret, ctx->fp); + return ret; + } + return EOF; +} + +ParseResult parse_sexp(Parser *parser) { + skip_spaces(parser); + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + int next = Parser_peek(parser); if (next == '(') { - return parse_list(ctx); + return parse_list(parser); } else if (next == ',') { - parser_getchar(ctx); - if (parser_peek(ctx) == '@') { - return parse_slicing_unquote(ctx); + Parser_getchar(parser); + if (Parser_peek(parser) == '@') { + Parser_getchar(parser); + return parse_slicing_unquote(parser); } - return parse_unquote(ctx); + return parse_unquote(parser); } else if (next == '`') { - return parse_quasi(ctx); + Parser_getchar(parser); + return parse_quasi(parser); } else if (next == '\'') { - return parse_quote(ctx); + Parser_getchar(parser); + return parse_quote(parser); + } + return parse_atom(parser); +} + +static ParseResult expect_char(Parser *parser, int chr) { + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + if (Parser_peek(parser) == chr) { + Parser_getchar(parser); + return ParseOk(parser->ctx->nil); + } + return ParseErr(parser, "Unexpected character %c.\n", (char)chr); +} + +static ParseResult expect_space(Parser *parser) { + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + if (isspace(Parser_peek(parser))) { + Parser_getchar(parser); + return ParseOk(parser->ctx->nil); + } + return ParseErr(parser, "Expect space.\n"); +} + +static SExpRef build_list_from_vector(Interp *ctx, SExpRefVector elems) { + int i = SExpRefVector_len(&elems) - 1; + SExpRef ret = *SExpRefVector_ref(&elems, i); + for (; i >= 0; i--) { + SExpRef cur = *SExpRefVector_ref(&elems, i); + ret = cons(ctx, cur, ret); + } + return ret; +} + +ParseResult parse_list(Parser *parser) { + SExpRefVector elems; + SExpRefVector_init(&elems); + ParseResult ret; + + ret = expect_char(parser, '('); + if (ParseResult_is_err(ret)) goto end; + skip_spaces(parser); + while (1) { + if (Parser_peek(parser) == EOF) { + ret = ParseErr(parser, "Unexpected EOF.\n."); + goto end; + } + if (Parser_peek(parser) == ')') { + Parser_getchar(parser); + SExpRefVector_push_back(&elems, parser->ctx->nil); + ret = ParseOk(build_list_from_vector(parser->ctx, elems)); + goto end; + } else if (Parser_peek(parser) == '.') { + Parser_getchar(parser); + break; + } + ret = parse_sexp(parser); + if (ParseResult_is_err(ret)) goto end; + SExpRefVector_push_back(&elems, ret.val); + ret = expect_space(parser); + if (ParseResult_is_err(ret)) goto end; + skip_spaces(parser); } - return parse_atom(ctx); + // dot + ret = expect_space(parser); + if (ParseResult_is_err(ret)) goto end; + skip_spaces(parser); + ret = parse_sexp(parser); + if (ParseResult_is_err(ret)) goto end; + SExpRefVector_push_back(&elems, ret.val); + skip_spaces(parser); + ret = expect_char(parser, ')'); + if (ParseResult_is_err(ret)) goto end; + ret = ParseOk(build_list_from_vector(parser->ctx, elems)); +end: + SExpRefVector_free(&elems); + return ret; +} + +static char *read_token(Parser *parser) { + int i = 0; + while (!isspace(Parser_peek(parser)) + && Parser_peek(parser) != EOF + && i < BUFSIZE - 1) { + parser->token_buf[i] = Parser_getchar(parser); + i++; + } + if (i > 1022) return NULL; + parser->token_buf[i] = '\0'; + return parser->token_buf; +} + +static bool is_symbol_init(char c) { + if (isalpha(c)) return true; + if (c == '!') return true; + if (c == '$') return true; + if (c == '%') return true; + if (c == '&') return true; + if (c == '*') return true; + if (c == '/') return true; + if (c == ':') return true; + if (c == '<') return true; + if (c == '=') return true; + if (c == '>') return true; + if (c == '?') return true; + if (c == '^') return true; + if (c == '_') return true; + if (c == '~') return true; + return false; +} + +static bool is_symbol_subsequent(char c) { + if (is_symbol_init(c)) return true; + if (isdigit(c)) return true; + if (c == '+') return true; + if (c == '-') return true; + if (c == '.') return true; + if (c == '@') return true; + return false; +} + +static ParseResult parse_token(Parser *parser, const char *token) { + int len = strlen(token); + if (len == 0) return ParseErr(parser, "Empty token.\n"); + if (len == 1) { + if (token[0] == '-' || token[0] == '+') { + return ParseOk(new_symbol(parser->ctx, token)); + } + } + if (token[0] == '#') { + if (len < 2) return ParseErr(parser, "Expect boolean or character.\n"); + if (token[1] == 't') return ParseOk(new_boolean(parser->ctx, true)); + if (token[1] == 'f') return ParseOk(new_boolean(parser->ctx, false)); + if (token[1] == '\\') { + if (len < 3) return ParseErr(parser, "Expect character.\n"); + if (len == 3) return ParseOk(new_char(parser->ctx, token[2])); + if (strcmp(token+2, "newline") == 0) return ParseOk(new_char(parser->ctx, '\n')); + if (strcmp(token+2, "space") == 0) return ParseOk(new_char(parser->ctx, ' ')); + if (strcmp(token+2, "tab") == 0) return ParseOk(new_char(parser->ctx, '\t')); + if (strcmp(token+2, "return") == 0) return ParseOk(new_char(parser->ctx, '\r')); + return ParseErr(parser, "Unknown character name: %s\n.", token + 2); + } + } + if (is_symbol_init(token[0])) { + for (int i = 1; i < len; i++) { + if (!is_symbol_subsequent(token[i])) { + return ParseErr(parser, "Not a symbol, containing illegal character: %s\n.", token); + } + } + return ParseOk(new_symbol(parser->ctx, token)); + } + char *endptr; + int64_t integer = strtoll(token, &endptr, 10); + if (endptr == token + len) return ParseOk(new_integer(parser->ctx, integer)); + double real = strtod(token, &endptr); + if (endptr == token + len) return ParseOk(new_real(parser->ctx, real)); + return ParseErr(parser, "Not a number : %s\n.", token); +} + +ParseResult parse_string(Parser *parser) { + ParseResult ret; + CharVector buf; + CharVector_init(&buf); + Parser_getchar(parser); + while (Parser_peek(parser) != '"') { + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + if (Parser_peek(parser) == '\0') { + ret = ParseErr(parser, "Unexpected zero terminator.\n"); + goto end; + } + if (Parser_peek(parser) != '\\') { + CharVector_push_back(&buf, Parser_getchar(parser)); + } else { + Parser_getchar(parser); + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + int c = Parser_getchar(parser); + if (c == EOF) { + ret = ParseErr(parser, "Unexpected EOF: %c.\n", c); + goto end; + } else if (c == '\\') CharVector_push_back(&buf, '\\'); + else if (c == 't') CharVector_push_back(&buf, '\t'); + else if (c == 'n') CharVector_push_back(&buf, '\n'); + else if (c == 'r') CharVector_push_back(&buf, '\r'); + else if (c == '"') CharVector_push_back(&buf, '"'); + else { + ret = ParseErr(parser, "Unexpected escape char: %c.\n", c); + goto end; + } + } + } + CharVector_push_back(&buf, '\0'); + ret = ParseOk(new_string(parser->ctx, buf.buffer)); +end: + CharVector_free(&buf); + return ret; +} + +ParseResult parse_atom(Parser *parser) { + ParseResult ret; + if (Parser_peek(parser) == EOF) { + return ParseErr(parser, "Unexpected EOF.\n."); + } + if (Parser_peek(parser) == '"') return parse_string(parser); + const char *token = read_token(parser); + if (token == NULL) return ParseErr(parser, "Token too long.\n"); + return parse_token(parser, token); +} + +ParseResult parse_abbrev(Parser *parser, const char *name) { + if (isspace(Parser_peek(parser))) { + return ParseErr(parser, "Unexpected space.\n"); + } + ParseResult ret; + ret = parse_sexp(parser); + if (ParseResult_is_err(ret)) return ret; + SExpRef sym = new_symbol(parser->ctx, name); + return ParseOk(cons(parser->ctx, sym, ret.val)); +} + +ParseResult parse_quote(Parser *parser) { + return parse_abbrev(parser, "quote"); +} + +ParseResult parse_unquote(Parser *parser) { + return parse_abbrev(parser, "unquote"); +} + +ParseResult parse_slicing_unquote(Parser *parser) { + return parse_abbrev(parser, "slicing-unquote"); +} + +ParseResult parse_quasi(Parser *parser) { + return parse_abbrev(parser, "quasiquote"); } diff --git a/src/parser.h b/src/parser.h index 3f159b8..cefc946 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,33 +3,59 @@ #include <stdbool.h> +#include "interp.h" #include "sexp.h" -typedef struct { +typedef enum { + kParseString, + kParseFile, +} ParseType; +typedef struct { + Interp *ctx; + char *errmsg_buf; + char *token_buf; + + ParseType parse_type; + union { + struct { + const char *string; + const char *str_cursor; + }; + FILE *fp; + }; } Parser; +void Parser_init(Parser *self); +void Parser_free(Parser *self); +int Parser_getchar(Parser *self); +int Parser_peek(Parser *self); +void Parser_set_string(Parser *parser, const char *str); +void Parser_set_file(Parser *parser, FILE *fp); + typedef struct { SExpRef val; const char *errmsg; } ParseResult; -int parser_getchar(Parser *ctx); -int parser_peek(Parser *ctx); - -ParseResult parse_sexp(Parser *ctx); -ParseResult parse_list(Parser *ctx); -ParseResult parse_quote(Parser *ctx); -ParseResult parse_unquote(Parser *ctx); -ParseResult parse_slicing_unquote(Parser *ctx); -ParseResult parse_quasi(Parser *ctx); -ParseResult parse_atom(Parser *ctx); -ParseResult parse_number(Parser *ctx); -ParseResult parse_integer(Parser *ctx); -ParseResult parse_real(Parser *ctx); -ParseResult parse_symbol(Parser *ctx); -ParseResult parse_string(Parser *ctx); -ParseResult parse_char(Parser *ctx); +ParseResult ParseOk(SExpRef ref); +ParseResult ParseErr(Parser *parser, const char *format, ...); +bool ParseResult_is_err(ParseResult res); + + +ParseResult parse_sexp(Parser *parser); +ParseResult parse_list(Parser *parser); +ParseResult parse_quote(Parser *parser); +ParseResult parse_unquote(Parser *parser); +ParseResult parse_slicing_unquote(Parser *parser); +ParseResult parse_quasi(Parser *parser); +ParseResult parse_atom(Parser *parser); +ParseResult parse_number(Parser *parser); +ParseResult parse_integer(Parser *parser); +ParseResult parse_real(Parser *parser); +ParseResult parse_symbol(Parser *parser); +ParseResult parse_string(Parser *parser); +ParseResult parse_char(Parser *parser); #endif @@ -1,8 +1,30 @@ #include "sexp.h" #include "algds/vec.h" +#include <inttypes.h> + +void SExpRef_show(SExpRef self, FILE* fp) { } + void SExp_show(SExp self, FILE* fp) { - fprintf(fp, "{SEXP}"); + if (self.type == kEmptySExp) fprintf(fp, "<EMPTY>"); + else if (self.type == kIntegerSExp) fprintf(fp, "%"PRId64, self.integer); + else if (self.type == kRealSExp) fprintf(fp, "%lf", self.real); + else if (self.type == kBooleanSExp) { + if (self.boolean) fprintf(fp, "#t"); + else fprintf(fp, "#f"); + } else if (self.type == kNilSExp) fprintf(fp, "()"); + else if (self.type == kCharSExp) fprintf(fp, "#\\%c", self.character); + else if (self.type == kStringSExp) fprintf(fp, "\"%s\"", self.str); + else if (self.type == kSymbolSExp) fprintf(fp, "'%s", self.str); + else if (self.type == kUserDataSExp) fprintf(fp, "<%p>", self.userdata); + else if (self.type == kFuncSExp) fprintf(fp, "<FUNCTION>"); + else if (self.type == kPairSExp) { + fprintf(fp, "(<%d> . <%d>)", self.pair.car.idx, self.pair.cdr.idx); + } + else if (self.type == kEnvSExp) fprintf(fp, "<Env>"); + else if (self.type == kBindingSExp) fprintf(fp, "<BINDING>"); + else if (self.type == kMacroSExp) fprintf(fp, "<MACRO>"); } VECTOR_IMPL(SExp); +VECTOR_IMPL(SExpRef); @@ -21,6 +21,7 @@ typedef struct { typedef struct { SExpRef args; SExpRef body; + SExpRef env; } SExpFunc; typedef struct { @@ -30,7 +31,6 @@ typedef struct { typedef struct { SExpRef parent; - SExpRef child; SExpRef bindings; } SExpEnv; @@ -42,10 +42,11 @@ typedef struct { } SExpBinding; typedef enum { + kEmptySExp, kIntegerSExp, kRealSExp, kBooleanSExp, - kNumberSExp, + kNilSExp, kCharSExp, kStringSExp, kSymbolSExp, @@ -58,6 +59,7 @@ typedef enum { } SExpType; struct sexp { + bool marked; SExpType type; union { int64_t integer; @@ -73,8 +75,10 @@ struct sexp { }; void SExp_show(SExp self, FILE* fp); +void SExpRef_show(SExpRef self, FILE* fp); VECTOR_DEF(SExp); +VECTOR_DEF(SExpRef); #endif diff --git a/tests/test_parser.c b/tests/test_parser.c new file mode 100644 index 0000000..c0587cb --- /dev/null +++ b/tests/test_parser.c @@ -0,0 +1,82 @@ +#include <assert.h> +#include <stdio.h> + +#include "interp.h" +#include "parser.h" +#include "sexp.h" + +ParseResult parse_str(Parser *parser, const char* str) { + Parser_set_string(parser, str); + return parse_sexp(parser); +} + +#define ATOM_TEST(_str, _type_enum, _field, _expect) \ +{ \ + res = parse_str(&parser, (_str)); \ + assert(!ParseResult_is_err(res)); \ + sexp = *Interp_ref(&interp, res.val); \ + assert(sexp.type == (_type_enum)); \ + assert(sexp._field == (_expect)); \ +} + +#define STRING_TEST(_str, _type_enum, _expect) \ +{ \ + res = parse_str(&parser, (_str)); \ + assert(!ParseResult_is_err(res)); \ + sexp = *Interp_ref(&interp, res.val); \ + assert(sexp.type == _type_enum); \ + assert(strcmp(sexp.str, (_expect)) == 0); \ +} + +#define ERROR_TEST(_str) \ +{ \ + res = parse_str(&parser, (_str)); \ + assert(ParseResult_is_err(res)); \ +} + +int main() { + printf("[TEST] parser\n"); + Interp interp; + Parser parser; + Interp_init(&interp); + Parser_init(&parser); + parser.ctx = &interp; + + ParseResult res; + SExp sexp; + + ATOM_TEST("1.11", kRealSExp, real, 1.11); + ATOM_TEST("-1.11", kRealSExp, real, -1.11); + ATOM_TEST("1.11e10", kRealSExp, real, 1.11e10); + ATOM_TEST("1.11 ", kRealSExp, real, 1.11); + ATOM_TEST("1.11e10 ", kRealSExp, real, 1.11e10); + ATOM_TEST(" 1.11 ", kRealSExp, real, 1.11); + ATOM_TEST(" 1.11e10 ", kRealSExp, real, 1.11e10); + ERROR_TEST("123.1x"); + + ATOM_TEST("42", kIntegerSExp, integer, 42); + ATOM_TEST("-42", kIntegerSExp, integer, -42); + ERROR_TEST("123x"); + + ATOM_TEST("#t", kBooleanSExp, boolean, true); + ATOM_TEST("#f", kBooleanSExp, boolean, false); + ERROR_TEST("#x"); + + ATOM_TEST("#\\t", kCharSExp, character, 't'); + ATOM_TEST("#\\newline", kCharSExp, character, '\n'); + ERROR_TEST("#\\uwu"); + + STRING_TEST("\"test\"", kStringSExp, "test"); + STRING_TEST("\"t\\nest\"", kStringSExp, "t\nest"); + STRING_TEST("!uwu", kSymbolSExp, "!uwu"); + STRING_TEST("-", kSymbolSExp, "-"); + STRING_TEST("+", kSymbolSExp, "+"); + ERROR_TEST("\"t\\xst\""); + ERROR_TEST("-abc"); + ERROR_TEST("@1"); + ERROR_TEST("a|"); + + Interp_free(&interp); + Parser_free(&parser); + printf("[PASS] parser\n"); +} |
