932 lines
30 KiB
C
932 lines
30 KiB
C
|
#include "ast.h"
|
||
|
|
||
|
#include <stdarg.h>
|
||
|
#include <ctype.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <inttypes.h>
|
||
|
#include <string.h>
|
||
|
#include <regex.h>
|
||
|
#include <math.h>
|
||
|
|
||
|
static bool next_token(TokenStream *stream, Token *out, AstErrorList **err);
|
||
|
static AstNode *process_token(Token *token, TokenStream *stream,
|
||
|
AstQuoteType in_quote, AstErrorList **err);
|
||
|
static AstNode *ast_next_toplevel_internal(TokenStream *stream,
|
||
|
AstQuoteType in_quote,
|
||
|
AstErrorList **err);
|
||
|
static void dump_node_list(AstNode **list, size_t count, char sdelim,
|
||
|
char edelim, int padding, FILE *stream);
|
||
|
|
||
|
static const char *DECIMAL_NUM_PAT =
|
||
|
"^([+-])?([0-9]*)\\.?([0-9]*)(e([+-]?)([0-9]*)\\.?([0-9]*))?$";
|
||
|
static regex_t DECIMAL_NUM_REGEX;
|
||
|
|
||
|
static const char *NON_DECIMAL_NUM_PAT =
|
||
|
"^(2|8|10|16)#([+-])?([0-9a-f]+)$";
|
||
|
static regex_t NON_DECIMAL_NUM_REGEX;
|
||
|
static size_t REGEX_NMATCH;
|
||
|
|
||
|
void ast_init_parser() {
|
||
|
bool had_error = false;
|
||
|
int code;
|
||
|
if ((code = regcomp(&DECIMAL_NUM_REGEX, DECIMAL_NUM_PAT, REG_EXTENDED))) {
|
||
|
fprintf(stderr, "Failed to compile decimal number regex:\n%s\n",
|
||
|
DECIMAL_NUM_PAT);
|
||
|
char msg[1024];
|
||
|
regerror(code, &DECIMAL_NUM_REGEX, msg, sizeof(msg));
|
||
|
fprintf(stderr, " %s\n", msg);
|
||
|
had_error = true;
|
||
|
}
|
||
|
if (regcomp(&NON_DECIMAL_NUM_REGEX, NON_DECIMAL_NUM_PAT, REG_EXTENDED)) {
|
||
|
if (had_error) {
|
||
|
fputc('\n', stderr);
|
||
|
}
|
||
|
fprintf(stderr, "Failed to compile non-decimal number regex:\n%s\n",
|
||
|
NON_DECIMAL_NUM_PAT);
|
||
|
char msg[1024];
|
||
|
regerror(code, &NON_DECIMAL_NUM_REGEX, msg, sizeof(msg));
|
||
|
fprintf(stderr, " %s\n", msg);
|
||
|
had_error = true;
|
||
|
}
|
||
|
if (had_error) {
|
||
|
exit(1);
|
||
|
}
|
||
|
REGEX_NMATCH = (DECIMAL_NUM_REGEX.re_nsub > NON_DECIMAL_NUM_REGEX.re_nsub ?
|
||
|
DECIMAL_NUM_REGEX.re_nsub : NON_DECIMAL_NUM_REGEX.re_nsub)
|
||
|
+ 1;
|
||
|
}
|
||
|
|
||
|
void ast_deinit_parser() {
|
||
|
regfree(&DECIMAL_NUM_REGEX);
|
||
|
regfree(&NON_DECIMAL_NUM_REGEX);
|
||
|
}
|
||
|
|
||
|
// vasprintf is nonstandard, open_memstream is POSIX 2008
|
||
|
char *compat_vasprintf(const char *fmt, va_list args) {
|
||
|
va_list args2;
|
||
|
va_copy(args2, args);
|
||
|
size_t size = vsnprintf(NULL, 0, fmt, args) + 1;
|
||
|
char *buf = malloc(size);
|
||
|
vsnprintf(buf, size, fmt, args2);
|
||
|
va_end(args2);
|
||
|
return buf;
|
||
|
}
|
||
|
|
||
|
static void push_error_list_end(AstErrorList **list, AstErrorList *err) {
|
||
|
err->next = NULL;
|
||
|
if (!*list) {
|
||
|
*list = err;
|
||
|
} else {
|
||
|
AstErrorList *cur = *list;
|
||
|
while (cur->next) {
|
||
|
cur = cur->next;
|
||
|
}
|
||
|
cur->next = err;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__attribute__((format(printf, 4, 5)))
|
||
|
static void push_build_error(AstErrorList **list, Token *token, size_t off,
|
||
|
const char *fmt, ...) {
|
||
|
if (list) {
|
||
|
AstErrorList *n = malloc(sizeof(AstErrorList));
|
||
|
n->type = AST_ERROR_BUILD;
|
||
|
n->build.off = off;
|
||
|
n->build.token = *token;
|
||
|
token->text = NULL;
|
||
|
token->buf_len = 0;
|
||
|
va_list args;
|
||
|
va_start(args, fmt);
|
||
|
n->build.msg = compat_vasprintf(fmt, args);
|
||
|
va_end(args);
|
||
|
push_error_list_end(list, n);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void push_parse_error(AstErrorList **list, ParseError *err) {
|
||
|
if (list) {
|
||
|
AstErrorList *n = malloc(sizeof(AstErrorList));
|
||
|
n->type = AST_ERROR_PARSE;
|
||
|
n->parse = err;
|
||
|
push_error_list_end(list, n);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void *make_ast_node(size_t size, AstType type, size_t line,
|
||
|
size_t col) {
|
||
|
AstNode *node = malloc(size);
|
||
|
node->type = type;
|
||
|
node->line = line;
|
||
|
node->col = col;
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
static int compat_strcasecmp(const char *str1, const char *str2) {
|
||
|
while (*str1 && *str2) {
|
||
|
if (tolower(*str1) != tolower(*str2)) {
|
||
|
return tolower(*str1) - tolower(*str2);
|
||
|
}
|
||
|
++str1;
|
||
|
++str2;
|
||
|
}
|
||
|
return tolower(*str1) - tolower(*str2);
|
||
|
}
|
||
|
|
||
|
// number of chars converted on success, 0 on failure
|
||
|
static int convert_numeric_char_escape(const char *escape, wchar_t *out,
|
||
|
bool allow_trailing) {
|
||
|
size_t len = strlen(escape) - 1;
|
||
|
size_t expected_len;
|
||
|
int base;
|
||
|
if (tolower(escape[0]) == 'x') {
|
||
|
expected_len = 4;
|
||
|
base = 16;
|
||
|
} else if (tolower(escape[0]) == 'o') {
|
||
|
expected_len = 6;
|
||
|
base = 8;
|
||
|
} else if (tolower(escape[0]) == 'd') {
|
||
|
expected_len = 5;
|
||
|
base = 10;
|
||
|
}
|
||
|
if (len < expected_len || (!allow_trailing && len > expected_len)) {
|
||
|
return 0;
|
||
|
}
|
||
|
char *endptr;
|
||
|
char numbuf[expected_len + 1];
|
||
|
memcpy(numbuf, escape + 1, expected_len);
|
||
|
numbuf[expected_len] = '\0';
|
||
|
uintmax_t num = strtoumax(numbuf, &endptr, base);
|
||
|
if (*endptr) {
|
||
|
return 0;
|
||
|
}
|
||
|
*out = num;
|
||
|
return expected_len + 1;
|
||
|
}
|
||
|
|
||
|
static const struct {
|
||
|
char escape;
|
||
|
char value;
|
||
|
} C_STYLE_ESCAPE_MAP[] = {
|
||
|
{'n', '\n'},
|
||
|
{'t', '\t'},
|
||
|
{'r', '\r'},
|
||
|
{'v', '\v'},
|
||
|
{'f', '\f'},
|
||
|
{'b', '\b'},
|
||
|
{'a', '\a'},
|
||
|
{'0', '\0'},
|
||
|
{'\\', '\\'},
|
||
|
};
|
||
|
const size_t C_STYLE_ESCAPE_COUNT = sizeof(C_STYLE_ESCAPE_MAP) /
|
||
|
sizeof(C_STYLE_ESCAPE_MAP[0]);
|
||
|
|
||
|
// true on success, false on failure
|
||
|
static bool convert_c_style_char_escape(const char *escape, wchar_t *out) {
|
||
|
for (size_t i = 0; i < C_STYLE_ESCAPE_COUNT; ++i) {
|
||
|
if (tolower(escape[0]) == C_STYLE_ESCAPE_MAP[i].escape) {
|
||
|
*out = C_STYLE_ESCAPE_MAP[i].value;
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// null byte on failure
|
||
|
static char escape_for_char(char to_escape) {
|
||
|
for (size_t i = 0; i < C_STYLE_ESCAPE_COUNT; ++i) {
|
||
|
if (to_escape == C_STYLE_ESCAPE_MAP[i].value) {
|
||
|
return C_STYLE_ESCAPE_MAP[i].escape;
|
||
|
}
|
||
|
}
|
||
|
return '\0';
|
||
|
}
|
||
|
|
||
|
static char *escape_string(const char *input, size_t input_len, size_t *out_len) {
|
||
|
size_t out_size = input_len + 1;
|
||
|
char *out = malloc(out_size);
|
||
|
*out_len = 0;
|
||
|
for (size_t i = 0; i < input_len; ++i) {
|
||
|
char escape = escape_for_char(input[i]);
|
||
|
if (escape) {
|
||
|
out = realloc(out, ++out_size);
|
||
|
out[(*out_len)++] = '\\';
|
||
|
out[(*out_len)++] = escape;
|
||
|
} else if (input[i] == '"') {
|
||
|
out = realloc(out, ++out_size);
|
||
|
out[(*out_len)++] = '\\';
|
||
|
out[(*out_len)++] = '"';
|
||
|
} else {
|
||
|
out[(*out_len)++] = input[i];
|
||
|
}
|
||
|
}
|
||
|
out[(*out_len)] = '\0';
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
static const struct {
|
||
|
const char *escape;
|
||
|
char value;
|
||
|
} NAMED_CHAR_ESCAPE_MAP[] = {
|
||
|
{"newline", '\n'},
|
||
|
{"tab", '\t'},
|
||
|
{"return", '\r'},
|
||
|
{"vtab", '\v'},
|
||
|
{"page_break", '\f'},
|
||
|
{"backspace", '\b'},
|
||
|
{"alert", '\a'},
|
||
|
{"null", '\0'},
|
||
|
{"backslash", '\\'},
|
||
|
};
|
||
|
static const size_t NAMED_CHAR_COUNT = sizeof(NAMED_CHAR_ESCAPE_MAP) /
|
||
|
sizeof(NAMED_CHAR_ESCAPE_MAP[0]);
|
||
|
|
||
|
// true on success, false on failure
|
||
|
static bool convert_named_char_escape(const char *escape, wchar_t *out) {
|
||
|
for (size_t i = 0; i < NAMED_CHAR_COUNT; ++i) {
|
||
|
if (compat_strcasecmp(NAMED_CHAR_ESCAPE_MAP[i].escape, escape) == 0) {
|
||
|
*out = NAMED_CHAR_ESCAPE_MAP[i].value;
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static AstIntNode *process_char_token(Token *token, AstErrorList **err) {
|
||
|
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
|
||
|
token->line, token->col);
|
||
|
// remove the # sign
|
||
|
char *sym = token->text + 1;
|
||
|
// special character
|
||
|
if (sym[0] == '\\') {
|
||
|
// account for '#' in token->len
|
||
|
if (token->len < 3) {
|
||
|
push_build_error(err, token, 1, "expected escape sequence");
|
||
|
free(node);
|
||
|
return NULL;
|
||
|
}
|
||
|
wchar_t c;
|
||
|
if (!convert_named_char_escape(sym + 1, &c) &&
|
||
|
!convert_c_style_char_escape(sym + 1, &c) &&
|
||
|
!convert_numeric_char_escape(sym + 1, &c, false)) {
|
||
|
free(token->text);
|
||
|
free(node);
|
||
|
push_build_error(err, token, 0,
|
||
|
"invalid escape sequence in character literal");
|
||
|
return NULL;
|
||
|
}
|
||
|
node->value = c;
|
||
|
} else {
|
||
|
node->value = sym[0];
|
||
|
}
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
static char *process_string_escapes(Token *token, size_t *out_len,
|
||
|
AstErrorList **err) {
|
||
|
const char *text = token->text;
|
||
|
size_t out_size = token->len + 1;
|
||
|
char *out = malloc(out_size);
|
||
|
*out_len = 0;
|
||
|
bool backslash = 0;
|
||
|
for (size_t i = 1; i < token->len - 1; ++i) {
|
||
|
if (!backslash && text[i] == '\\') {
|
||
|
backslash = true;
|
||
|
continue;
|
||
|
} else if (backslash && text[i] == '\n') {
|
||
|
// backslash can escape a newline
|
||
|
} else if (backslash) {
|
||
|
size_t count = 1;
|
||
|
wchar_t c;
|
||
|
if (!convert_c_style_char_escape(&text[i], &c) &&
|
||
|
!(count = convert_numeric_char_escape(&text[i], &c, true))) {
|
||
|
push_build_error(err, token, i, "invalid escape sequence");
|
||
|
return NULL;
|
||
|
}
|
||
|
if (out_size - *out_len - 1 < MB_CUR_MAX) {
|
||
|
out_size = out_size + MB_CUR_MAX - (out_size - *out_len - 1);
|
||
|
out = realloc(out, out_size);
|
||
|
}
|
||
|
*out_len += wctomb(out + *out_len, c);
|
||
|
i += count - 1;
|
||
|
} else {
|
||
|
if (*out_len >= out_size) {
|
||
|
out = realloc(out, out_size + token->len - i + 1);
|
||
|
}
|
||
|
out[(*out_len)++] = text[i];
|
||
|
}
|
||
|
backslash = false;
|
||
|
}
|
||
|
out = realloc(out, *out_len + 1);
|
||
|
out[*out_len] = '\0';
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
static AstStringNode *process_string_token(Token *token, AstErrorList **err) {
|
||
|
AstStringNode *node = make_ast_node(sizeof(AstStringNode), AST_TYPE_STRING,
|
||
|
token->line, token->col);
|
||
|
node->value = process_string_escapes(token, &node->length, err);
|
||
|
if (!node->value) {
|
||
|
free(node);
|
||
|
node = NULL;
|
||
|
}
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
static AstNode *make_null_node(size_t line, size_t col) {
|
||
|
return make_ast_node(sizeof(AstNode), AST_TYPE_NULL, line, col);
|
||
|
}
|
||
|
|
||
|
static AstNode *process_symbol_token(Token *token) {
|
||
|
if (strcmp(token->text, "nil") == 0) {
|
||
|
return (AstNode *) make_null_node(token->line, token->col);
|
||
|
}
|
||
|
AstSymbolNode *node = make_ast_node(sizeof(AstSymbolNode), AST_TYPE_SYMBOL,
|
||
|
token->line, token->col);
|
||
|
node->name = token->text;
|
||
|
node->name_length = token->len;
|
||
|
node->is_property = token->text[0] == ':';
|
||
|
node->skip_free = false;
|
||
|
token->text = NULL;
|
||
|
token->buf_len = 0;
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
|
||
|
static int sign_for_match(Token *token, regmatch_t *match) {
|
||
|
if (match->rm_so != match->rm_eo &&
|
||
|
token->text[match->rm_so] == '-') {
|
||
|
return -1;
|
||
|
} else {
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void break_number_for_matches(Token *token, regmatch_t *matches,
|
||
|
int main, int dec, uintmax_t *main_out,
|
||
|
uintmax_t *dec_out) {
|
||
|
const char *text = token->text;
|
||
|
regmatch_t *mm = &matches[main];
|
||
|
regmatch_t *dm = &matches[dec];
|
||
|
*main_out = 0;
|
||
|
// main number has at least 1 char
|
||
|
if (mm->rm_eo - mm->rm_so) {
|
||
|
*main_out = strtoumax(text + mm->rm_so, NULL, 10);
|
||
|
}
|
||
|
*dec_out = 0;
|
||
|
// decimal number has at least 1 char
|
||
|
if (dm->rm_eo - dm->rm_so) {
|
||
|
*dec_out = strtoumax(text + dm->rm_so, NULL, 10);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static AstNode *process_decimal_matches(Token *token, regmatch_t *matches) {
|
||
|
int main_sign = sign_for_match(token, &matches[1]);
|
||
|
int exp_sign = sign_for_match(token, &matches[5]);
|
||
|
uintmax_t main_main, main_dec;
|
||
|
break_number_for_matches(token, matches, 2, 3, &main_main, &main_dec);
|
||
|
uintmax_t exp_main, exp_dec;
|
||
|
break_number_for_matches(token, matches, 6, 7, &exp_main, &exp_dec);
|
||
|
if (main_dec == 0 && exp_dec == 0 && exp_sign == 1) {
|
||
|
// return an integer
|
||
|
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
|
||
|
token->line, token->col);
|
||
|
node->value = main_sign * main_main * pow(10, exp_main);
|
||
|
return (AstNode *) node;
|
||
|
} else {
|
||
|
// return a float
|
||
|
AstFloatNode *node = make_ast_node(sizeof(AstFloatNode), AST_TYPE_FLOAT,
|
||
|
token->line, token->col);
|
||
|
int main_dec_len = floor(log10(main_dec) + 1);
|
||
|
int exp_dec_len = floor(log10(exp_dec) + 1);
|
||
|
double main_dec_f = main_dec * pow(10, -main_dec_len);
|
||
|
double exp_dec_f = exp_dec * pow(10, -exp_dec_len);
|
||
|
node->value = main_sign * ((double) main_main + main_dec_f) *
|
||
|
pow(10, exp_sign * ((double) exp_main + exp_dec_f));
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static AstNode *process_non_decimal_matches(Token *token, regmatch_t *matches,
|
||
|
AstErrorList **err) {
|
||
|
// get the base
|
||
|
int base;
|
||
|
if (token->text[0] == '2' || token->text[0] == '8') {
|
||
|
base = token->text[0] - '0';
|
||
|
} else {
|
||
|
base = 10 + token->text[1] - '0';
|
||
|
}
|
||
|
int sign = sign_for_match(token, &matches[2]);
|
||
|
char *endptr;
|
||
|
uintmax_t num = strtoumax(&token->text[matches[3].rm_so], &endptr, base);
|
||
|
// num is the abs of our target, so only check against positive max
|
||
|
if (*endptr || num > INT64_MAX) {
|
||
|
push_build_error(err, token, 0, "invalid numeric literal");
|
||
|
return NULL;
|
||
|
}
|
||
|
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
|
||
|
token->line, token->col);
|
||
|
node->value = sign * (intmax_t) num;
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
|
||
|
static AstNode *parse_number_token(Token *token, AstErrorList **err) {
|
||
|
regmatch_t matches[REGEX_NMATCH];
|
||
|
const char *text = token->text;
|
||
|
if (regexec(&DECIMAL_NUM_REGEX, text, REGEX_NMATCH, matches, 0) == 0) {
|
||
|
return process_decimal_matches(token, matches);
|
||
|
} else if (regexec(&NON_DECIMAL_NUM_REGEX, text, REGEX_NMATCH,
|
||
|
matches, 0) == 0) {
|
||
|
return process_non_decimal_matches(token, matches, err);
|
||
|
}
|
||
|
push_build_error(err, token, 0, "invalid numeric literal");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static bool is_node_symbol_t(AstNode *node) {
|
||
|
return node->type == AST_TYPE_SYMBOL &&
|
||
|
strcmp("t", ((AstSymbolNode *) node)->name) == 0;
|
||
|
}
|
||
|
|
||
|
static AstNode *simplify_quote_node(AstQuoteNode *node) {
|
||
|
AstNode *cur = (AstNode *) node;
|
||
|
while (cur->type == AST_TYPE_QUOTE) {
|
||
|
cur = ((AstQuoteNode *) cur)->form;
|
||
|
}
|
||
|
if (cur->type == AST_TYPE_NULL
|
||
|
|| cur->type == AST_TYPE_VECTOR
|
||
|
|| is_node_symbol_t(cur)) {
|
||
|
AstNode *inner = node->form;
|
||
|
node->form = NULL;
|
||
|
destroy_ast_node(node);
|
||
|
return inner;
|
||
|
}
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
|
||
|
static AstNode *quote_ast_form(AstQuoteType type,
|
||
|
AstNode *form, size_t line, size_t col,
|
||
|
AstQuoteType in_quote) {
|
||
|
AstQuoteNode *node = make_ast_node(sizeof(AstQuoteNode), AST_TYPE_QUOTE,
|
||
|
line, col);
|
||
|
node->type = type;
|
||
|
node->form = form;
|
||
|
if (!in_quote) {
|
||
|
return simplify_quote_node(node);
|
||
|
}
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
|
||
|
static AstNode *quote_next_toplevel(Token *token, TokenStream *stream,
|
||
|
AstQuoteType in_quote, AstErrorList **err) {
|
||
|
AstQuoteType my_type;
|
||
|
switch (token->type) {
|
||
|
case TOKEN_TYPE_QUOTE:
|
||
|
my_type = AST_QUOTE_NORM;
|
||
|
break;
|
||
|
case TOKEN_TYPE_BACKQUOTE:
|
||
|
my_type = AST_QUOTE_BACK;
|
||
|
break;
|
||
|
case TOKEN_TYPE_COMMA:
|
||
|
my_type = AST_QUOTE_COMMA;
|
||
|
break;
|
||
|
case TOKEN_TYPE_SPLICE:
|
||
|
my_type = AST_QUOTE_SPLICE;
|
||
|
break;
|
||
|
default:
|
||
|
// shouldn't happen
|
||
|
abort();
|
||
|
break;
|
||
|
}
|
||
|
if (in_quote != AST_QUOTE_BACK &&
|
||
|
(my_type == AST_QUOTE_COMMA || my_type == AST_QUOTE_SPLICE)) {
|
||
|
push_build_error(err, token, 0, "comma or splice not inside a backquote");
|
||
|
return NULL;
|
||
|
}
|
||
|
if (my_type > in_quote) {
|
||
|
in_quote = my_type;
|
||
|
}
|
||
|
AstNode *internal = ast_next_toplevel_internal(stream, in_quote, err);
|
||
|
if (!internal) {
|
||
|
// error already reported
|
||
|
return NULL;
|
||
|
}
|
||
|
return quote_ast_form(my_type, internal, token->line, token->col, in_quote);
|
||
|
}
|
||
|
|
||
|
static bool is_close_delim(Token *token) {
|
||
|
return (token->type == TOKEN_TYPE_PAREN || token->type == TOKEN_TYPE_BRACKET)
|
||
|
&& (token->text[0] == ')' || token->text[0] == ']');
|
||
|
}
|
||
|
|
||
|
static bool is_close_delim_for(Token *token, Token *child) {
|
||
|
if (token->type == child->type) {
|
||
|
switch (token->type) {
|
||
|
case TOKEN_TYPE_PAREN:
|
||
|
return child->text[0] == ')';
|
||
|
case TOKEN_TYPE_BRACKET:
|
||
|
return child->text[0] == ']';
|
||
|
default:
|
||
|
// fall-through
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
static AstNode *process_next_list_or_vector(Token *token, TokenStream *stream,
|
||
|
size_t size, AstType type,
|
||
|
off_t child_arr_off,
|
||
|
off_t child_count_off,
|
||
|
AstQuoteType in_quote,
|
||
|
AstErrorList **err) {
|
||
|
if (is_close_delim(token)) {
|
||
|
push_build_error(err, token, 0, "unmatched closing delimiter");
|
||
|
return NULL;
|
||
|
}
|
||
|
AstNode *node = make_ast_node(size, type, token->line, token->col);
|
||
|
AstNode ***child_arr_ptr = (void *) node + child_arr_off;
|
||
|
size_t *child_count_ptr = (void *) node + child_count_off;
|
||
|
*child_arr_ptr = NULL;
|
||
|
*child_count_ptr = 0;
|
||
|
bool error = false;
|
||
|
Token ctok;
|
||
|
ctok.text = NULL;
|
||
|
ctok.buf_len = 0;
|
||
|
while (true) {
|
||
|
if (!next_token(stream, &ctok, err)) {
|
||
|
// node MUST be valid for this to work
|
||
|
destroy_ast_node(node);
|
||
|
node = NULL;
|
||
|
break;
|
||
|
}
|
||
|
if (is_close_delim_for(token, &ctok)) {
|
||
|
break;
|
||
|
}
|
||
|
AstNode *cnode = process_token(&ctok, stream, in_quote, err);
|
||
|
if (!cnode) {
|
||
|
error = true;
|
||
|
if (token_stream_is_eof(stream)) {
|
||
|
push_build_error(err, token, 0, "unmatched opening delimiter");
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
*child_arr_ptr = realloc(*child_arr_ptr, sizeof(AstNode *) *
|
||
|
++(*child_count_ptr));
|
||
|
(*child_arr_ptr)[(*child_count_ptr) - 1] = cnode;
|
||
|
}
|
||
|
free(ctok.text);
|
||
|
if (error) {
|
||
|
destroy_ast_node(node);
|
||
|
return NULL;
|
||
|
}
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
static bool is_quote_symbol_node(AstNode *node) {
|
||
|
return node->type == AST_TYPE_SYMBOL &&
|
||
|
strcmp(((AstSymbolNode *) node)->name, "quote") == 0;
|
||
|
}
|
||
|
|
||
|
static AstNode *process_next_list(Token *token, TokenStream *stream,
|
||
|
AstQuoteType in_quote, AstErrorList **err) {
|
||
|
AstListNode *node = (AstListNode *)
|
||
|
process_next_list_or_vector(token, stream, sizeof(AstListNode),
|
||
|
AST_TYPE_LIST, offsetof(AstListNode, children),
|
||
|
offsetof(AstListNode, nchildren), in_quote, err);
|
||
|
if (!node) {
|
||
|
return NULL;
|
||
|
} else if (node->nchildren == 0) {
|
||
|
destroy_ast_node(node);
|
||
|
return (AstNode *) make_null_node(token->line, token->col);
|
||
|
}
|
||
|
bool is_quote = is_quote_symbol_node(node->children[0]);
|
||
|
if (is_quote && node->nchildren != 2) {
|
||
|
push_build_error(err, token, 0, "quote expects one argument, got %zu",
|
||
|
node->nchildren);
|
||
|
destroy_ast_node(node);
|
||
|
return NULL;
|
||
|
} else if (is_quote) {
|
||
|
AstNode *internal = node->children[1];
|
||
|
node->nchildren = 1;
|
||
|
destroy_ast_node(node);
|
||
|
return (AstNode *)quote_ast_form(AST_QUOTE_NORM, internal,
|
||
|
token->line, token->col,
|
||
|
in_quote);
|
||
|
}
|
||
|
return (AstNode *) node;
|
||
|
}
|
||
|
|
||
|
// true on success, false on error
|
||
|
static bool next_token(TokenStream *stream, Token *out, AstErrorList **err) {
|
||
|
out->text = NULL;
|
||
|
out->buf_len = 0;
|
||
|
do {
|
||
|
token_stream_next(stream, out);
|
||
|
ParseError *parse_err;
|
||
|
bool had_error = false;
|
||
|
while ((parse_err = token_stream_error(stream))) {
|
||
|
push_parse_error(err, parse_err);
|
||
|
had_error = true;
|
||
|
}
|
||
|
if (had_error) {
|
||
|
free(out->text);
|
||
|
out->text = NULL;
|
||
|
out->buf_len = 0;
|
||
|
return false;
|
||
|
}
|
||
|
} while (out->type == TOKEN_TYPE_COMMENT);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static AstNode *process_token(Token *token, TokenStream *stream,
|
||
|
AstQuoteType in_quote, AstErrorList **err) {
|
||
|
AstNode *retval = NULL;
|
||
|
switch (token->type) {
|
||
|
case TOKEN_TYPE_CHAR:
|
||
|
retval = (AstNode *) process_char_token(token, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_NUMBER:
|
||
|
retval = parse_number_token(token, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_STRING:
|
||
|
retval = (AstNode *) process_string_token(token, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_SYMBOL:
|
||
|
case TOKEN_TYPE_PROPERTY:
|
||
|
retval = (AstNode *) process_symbol_token(token);
|
||
|
break;
|
||
|
case TOKEN_TYPE_BACKQUOTE:
|
||
|
case TOKEN_TYPE_COMMA:
|
||
|
case TOKEN_TYPE_SPLICE:
|
||
|
case TOKEN_TYPE_QUOTE:
|
||
|
retval = (AstNode *) quote_next_toplevel(token, stream, in_quote, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_PAREN:
|
||
|
retval = process_next_list(token, stream, in_quote, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_BRACKET:
|
||
|
retval = process_next_list_or_vector(token, stream, sizeof(AstVectorNode),
|
||
|
AST_TYPE_VECTOR,
|
||
|
offsetof(AstVectorNode, children),
|
||
|
offsetof(AstVectorNode, nchildren),
|
||
|
AST_QUOTE_NORM, err);
|
||
|
break;
|
||
|
case TOKEN_TYPE_UNKNOWN:
|
||
|
push_build_error(err, token, 0, "unknown token");
|
||
|
break;
|
||
|
case TOKEN_TYPE_EOF:
|
||
|
// do nothing
|
||
|
break;
|
||
|
case TOKEN_TYPE_COMMENT:
|
||
|
// shouldn't happen
|
||
|
abort();
|
||
|
break;
|
||
|
}
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
static AstNode *ast_next_toplevel_internal(TokenStream *stream,
|
||
|
AstQuoteType in_quote,
|
||
|
AstErrorList **err) {
|
||
|
Token token;
|
||
|
token.text = NULL;
|
||
|
token.buf_len = 0;
|
||
|
if (!next_token(stream, &token, err)) {
|
||
|
return NULL;
|
||
|
}
|
||
|
return process_token(&token, stream, in_quote, err);
|
||
|
}
|
||
|
|
||
|
AstNode *ast_next_toplevel(TokenStream *stream, AstErrorList **err) {
|
||
|
return ast_next_toplevel_internal(stream, AST_QUOTE_NONE, err);
|
||
|
}
|
||
|
|
||
|
void destroy_ast_node(void *node) {
|
||
|
if (!node) {
|
||
|
return;
|
||
|
}
|
||
|
switch (((AstNode *)node)->type) {
|
||
|
case AST_TYPE_LIST:
|
||
|
for (size_t i = 0; i < ((AstListNode *) node)->nchildren; ++i) {
|
||
|
destroy_ast_node(((AstListNode *) node)->children[i]);
|
||
|
}
|
||
|
free(((AstListNode *) node)->children);
|
||
|
break;
|
||
|
case AST_TYPE_VECTOR:
|
||
|
for (size_t i = 0; i < ((AstVectorNode *) node)->nchildren; ++i) {
|
||
|
destroy_ast_node(((AstVectorNode *) node)->children[i]);
|
||
|
}
|
||
|
free(((AstVectorNode *) node)->children);
|
||
|
break;
|
||
|
case AST_TYPE_STRING:
|
||
|
free(((AstStringNode *) node)->value);
|
||
|
break;
|
||
|
case AST_TYPE_SYMBOL:
|
||
|
if (!((AstSymbolNode *)node)->skip_free) {
|
||
|
free(((AstSymbolNode *) node)->name);
|
||
|
}
|
||
|
break;
|
||
|
case AST_TYPE_QUOTE:
|
||
|
destroy_ast_node(((AstQuoteNode *) node)->form);
|
||
|
break;
|
||
|
case AST_TYPE_INT:
|
||
|
case AST_TYPE_FLOAT:
|
||
|
case AST_TYPE_NULL:
|
||
|
break;
|
||
|
}
|
||
|
free(node);
|
||
|
}
|
||
|
|
||
|
static const char *str_for_ast_quote_type(AstQuoteType type) {
|
||
|
switch (type) {
|
||
|
case AST_QUOTE_NONE:
|
||
|
return "";
|
||
|
case AST_QUOTE_NORM:
|
||
|
return "'";
|
||
|
case AST_QUOTE_BACK:
|
||
|
return "`";
|
||
|
case AST_QUOTE_COMMA:
|
||
|
return ",";
|
||
|
case AST_QUOTE_SPLICE:
|
||
|
return ",@";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void ast_prin1_node_internal(AstNode *node, FILE *stream, int padding,
|
||
|
bool skip_print_pad) {
|
||
|
if (!skip_print_pad) {
|
||
|
for (int i = 0; i < padding; ++i) {
|
||
|
fputc(' ', stream);
|
||
|
}
|
||
|
}
|
||
|
switch (node->type) {
|
||
|
case AST_TYPE_INT: {
|
||
|
int64_t value = ((AstIntNode *) node)->value;
|
||
|
fprintf(stream, "%" PRId64 " (", value);
|
||
|
char escape;
|
||
|
if ((escape = escape_for_char((char) value))) {
|
||
|
fprintf(stream, "#\\%c, ", escape);
|
||
|
} else if (isprint(value)) {
|
||
|
fprintf(stream, "#%c, ", (char) value);
|
||
|
}
|
||
|
if (value < 0) {
|
||
|
fputc('-', stream);
|
||
|
value *= -1;
|
||
|
}
|
||
|
fprintf(stream, "0x%" PRIx64 ")", value);
|
||
|
}
|
||
|
break;
|
||
|
case AST_TYPE_STRING: {
|
||
|
size_t escaped_len;
|
||
|
char *escaped_string = escape_string(((AstStringNode *)node)->value,
|
||
|
((AstStringNode *)node)->length,
|
||
|
&escaped_len);
|
||
|
fputc('"', stream);
|
||
|
fwrite(escaped_string, 1, escaped_len, stream);
|
||
|
fputc('"', stream);
|
||
|
}
|
||
|
break;
|
||
|
case AST_TYPE_SYMBOL:
|
||
|
fwrite(((AstSymbolNode *) node)->name, 1,
|
||
|
((AstSymbolNode *) node)->name_length, stream);
|
||
|
break;
|
||
|
case AST_TYPE_FLOAT:
|
||
|
fprintf(stream, "%g", ((AstFloatNode *) node)->value);
|
||
|
break;
|
||
|
case AST_TYPE_LIST: {
|
||
|
dump_node_list(((AstListNode *) node)->children,
|
||
|
((AstListNode *) node)->nchildren,
|
||
|
'(', ')', padding, stream);
|
||
|
}
|
||
|
break;
|
||
|
case AST_TYPE_VECTOR:
|
||
|
dump_node_list(((AstVectorNode *) node)->children,
|
||
|
((AstVectorNode *) node)->nchildren,
|
||
|
'[', ']', padding, stream);
|
||
|
break;
|
||
|
case AST_TYPE_QUOTE: {
|
||
|
const char *quote_str = str_for_ast_quote_type(((AstQuoteNode *) node)->type);
|
||
|
fprintf(stream, "%s", quote_str);
|
||
|
padding += strlen(quote_str);
|
||
|
ast_prin1_node_internal(((AstQuoteNode *) node)->form, stream,
|
||
|
padding, true);
|
||
|
|
||
|
}
|
||
|
break;
|
||
|
case AST_TYPE_NULL:
|
||
|
fwrite("nil", 1, 3, stream);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void dump_node_list(AstNode **list, size_t count, char sdelim,
|
||
|
char edelim, int padding, FILE *stream) {
|
||
|
fputc(sdelim, stream);
|
||
|
if (count) {
|
||
|
ast_prin1_node_internal(list[0], stream, padding + 1, true);
|
||
|
}
|
||
|
for (size_t i = 1; i < count; ++i) {
|
||
|
fputc('\n', stream);
|
||
|
ast_prin1_node_internal(list[i], stream, padding + 1, false);
|
||
|
}
|
||
|
fputc(edelim, stream);
|
||
|
}
|
||
|
|
||
|
void ast_prin1_node(AstNode *node, FILE *stream) {
|
||
|
ast_prin1_node_internal(node, stream, 0, false);
|
||
|
fputc('\n', stream);
|
||
|
}
|
||
|
|
||
|
AstErrorList *ast_error_list_pop(AstErrorList **list) {
|
||
|
AstErrorList *top = *list;
|
||
|
if (*list) {
|
||
|
*list = (*list)->next;
|
||
|
}
|
||
|
return top;
|
||
|
}
|
||
|
|
||
|
void ast_error_list_free_one(AstErrorList *list) {
|
||
|
if (list) {
|
||
|
switch (list->type) {
|
||
|
case AST_ERROR_PARSE:
|
||
|
parse_error_free(list->parse);
|
||
|
break;
|
||
|
case AST_ERROR_BUILD:
|
||
|
free(list->build.msg);
|
||
|
token_free(&list->build.token);
|
||
|
break;
|
||
|
}
|
||
|
free(list);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ast_error_list_free_all(AstErrorList *list) {
|
||
|
while (list) {
|
||
|
AstErrorList *next = list->next;
|
||
|
ast_error_list_free_one(list);
|
||
|
list = next;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static const char *start_of_last_line(const char *str, size_t len,
|
||
|
size_t *line_len, size_t *num_passed) {
|
||
|
*num_passed = 0;
|
||
|
*line_len = 0;
|
||
|
const char *retval = str;
|
||
|
size_t i;
|
||
|
for (i = len; i > 0; --i) {
|
||
|
if (str[i - 1] == '\n' && *line_len) {
|
||
|
retval = &str[i];
|
||
|
break;
|
||
|
} else if (str[i - 1] != '\n') {
|
||
|
++(*line_len);
|
||
|
}
|
||
|
}
|
||
|
for (; i > 0; --i) {
|
||
|
if (str[i - 1] == '\n') {
|
||
|
++*num_passed;
|
||
|
}
|
||
|
}
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
void ast_format_error(AstErrorList *err, const char *file_name, FILE *stream) {
|
||
|
if (!err) {
|
||
|
return;
|
||
|
}
|
||
|
fprintf(stream, "error: ");
|
||
|
if (file_name) {
|
||
|
fprintf(stream, "%s: ", file_name);
|
||
|
}
|
||
|
switch (err->type) {
|
||
|
case AST_ERROR_PARSE: {
|
||
|
size_t line_len;
|
||
|
size_t num_passed;
|
||
|
const char *last_line = start_of_last_line(err->parse->context,
|
||
|
strlen(err->parse->context),
|
||
|
&line_len, &num_passed);
|
||
|
fprintf(stream, "%zu:%zu: %s\n ", err->parse->line + num_passed,
|
||
|
err->parse->at_end ? err->parse->col + line_len - 1 :
|
||
|
err->parse->col, err->parse->desc);
|
||
|
fwrite(last_line, 1, line_len, stream);
|
||
|
fwrite("\n ", 1, 3, stream);
|
||
|
if (err->parse->at_end) {
|
||
|
for (size_t i = 1; i < line_len; ++i) {
|
||
|
fputc(' ', stream);
|
||
|
}
|
||
|
}
|
||
|
fwrite("^\n", 1, 2, stream);
|
||
|
}
|
||
|
break;
|
||
|
case AST_ERROR_BUILD:
|
||
|
fprintf(stream, "%zu:%zu: %s\n %s\n ", err->build.token.line,
|
||
|
err->build.token.col + err->build.off,
|
||
|
err->build.msg, err->build.token.text);
|
||
|
for (size_t i = 1; i <= err->build.off; ++i) {
|
||
|
fputc(' ', stream);
|
||
|
}
|
||
|
fwrite("^\n", 1, 2, stream);
|
||
|
break;
|
||
|
}
|
||
|
}
|