simple-lisp/bootstrap/ast.c
2024-09-24 07:03:52 -07:00

938 lines
30 KiB
C

#include "ast.h"
#include <stdarg.h>
#include <ctype.h>
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <regex.h>
#include <math.h>
static bool next_token(TokenStream *stream, Token *out, AstErrorList **err);
static AstNode *process_token(Token *token, TokenStream *stream,
AstQuoteType in_quote, AstErrorList **err);
static AstNode *ast_next_toplevel_internal(TokenStream *stream,
AstQuoteType in_quote,
AstErrorList **err);
static void dump_node_list(AstNode **list, size_t count, char sdelim,
char edelim, int padding, FILE *stream);
static const char *DECIMAL_NUM_PAT =
"^([+-])?([0-9]*)\\.?([0-9]*)(e([+-]?)([0-9]*)\\.?([0-9]*))?$";
static regex_t DECIMAL_NUM_REGEX;
static const char *NON_DECIMAL_NUM_PAT =
"^(2|8|10|16)#([+-])?([0-9a-f]+)$";
static regex_t NON_DECIMAL_NUM_REGEX;
static size_t REGEX_NMATCH;
void ast_init_parser() {
bool had_error = false;
int code;
if ((code = regcomp(&DECIMAL_NUM_REGEX, DECIMAL_NUM_PAT, REG_EXTENDED))) {
fprintf(stderr, "Failed to compile decimal number regex:\n%s\n",
DECIMAL_NUM_PAT);
char msg[1024];
regerror(code, &DECIMAL_NUM_REGEX, msg, sizeof(msg));
fprintf(stderr, " %s\n", msg);
had_error = true;
}
if (regcomp(&NON_DECIMAL_NUM_REGEX, NON_DECIMAL_NUM_PAT, REG_EXTENDED)) {
if (had_error) {
fputc('\n', stderr);
}
fprintf(stderr, "Failed to compile non-decimal number regex:\n%s\n",
NON_DECIMAL_NUM_PAT);
char msg[1024];
regerror(code, &NON_DECIMAL_NUM_REGEX, msg, sizeof(msg));
fprintf(stderr, " %s\n", msg);
had_error = true;
}
if (had_error) {
exit(1);
}
REGEX_NMATCH = (DECIMAL_NUM_REGEX.re_nsub > NON_DECIMAL_NUM_REGEX.re_nsub ?
DECIMAL_NUM_REGEX.re_nsub : NON_DECIMAL_NUM_REGEX.re_nsub)
+ 1;
}
void ast_deinit_parser() {
regfree(&DECIMAL_NUM_REGEX);
regfree(&NON_DECIMAL_NUM_REGEX);
}
static void push_error_list_end(AstErrorList **list, AstErrorList *err) {
err->next = NULL;
if (!*list) {
*list = err;
} else {
AstErrorList *cur = *list;
while (cur->next) {
cur = cur->next;
}
cur->next = err;
}
}
__attribute__((format(printf, 4, 5)))
static void push_build_error(AstErrorList **list, Token *token, size_t off,
const char *fmt, ...) {
if (list) {
AstErrorList *n = malloc(sizeof(AstErrorList));
n->type = AST_ERROR_BUILD;
n->build.off = off;
n->build.token = *token;
token->text = NULL;
token->buf_len = 0;
va_list args;
va_start(args, fmt);
n->build.msg = NULL;
vasprintf(&n->build.msg, fmt, args);
va_end(args);
push_error_list_end(list, n);
}
}
static void push_parse_error(AstErrorList **list, ParseError *err) {
if (list) {
AstErrorList *n = malloc(sizeof(AstErrorList));
n->type = AST_ERROR_PARSE;
n->parse = err;
push_error_list_end(list, n);
}
}
static void *make_ast_node(size_t size, AstType type, size_t line,
size_t col) {
AstNode *node = malloc(size);
node->type = type;
node->line = line;
node->col = col;
return node;
}
static int compat_strcasecmp(const char *str1, const char *str2) {
while (*str1 && *str2) {
if (tolower(*str1) != tolower(*str2)) {
return tolower(*str1) - tolower(*str2);
}
++str1;
++str2;
}
return tolower(*str1) - tolower(*str2);
}
// number of chars converted on success, 0 on failure
static int convert_numeric_char_escape(const char *escape, wchar_t *out,
bool allow_trailing) {
size_t len = strlen(escape) - 1;
size_t expected_len;
int base;
if (tolower(escape[0]) == 'x') {
expected_len = 4;
base = 16;
} else if (tolower(escape[0]) == 'o') {
expected_len = 6;
base = 8;
} else if (tolower(escape[0]) == 'd') {
expected_len = 5;
base = 10;
}
if (len < expected_len || (!allow_trailing && len > expected_len)) {
return 0;
}
char *endptr;
char numbuf[expected_len + 1];
memcpy(numbuf, escape + 1, expected_len);
numbuf[expected_len] = '\0';
uintmax_t num = strtoumax(numbuf, &endptr, base);
if (*endptr) {
return 0;
}
*out = num;
return expected_len + 1;
}
static const struct {
char escape;
char value;
} C_STYLE_ESCAPE_MAP[] = {
{'n', '\n'},
{'t', '\t'},
{'r', '\r'},
{'v', '\v'},
{'f', '\f'},
{'b', '\b'},
{'a', '\a'},
{'0', '\0'},
{'\\', '\\'},
};
const size_t C_STYLE_ESCAPE_COUNT = sizeof(C_STYLE_ESCAPE_MAP) /
sizeof(C_STYLE_ESCAPE_MAP[0]);
// true on success, false on failure
static bool convert_c_style_char_escape(const char *escape, wchar_t *out) {
for (size_t i = 0; i < C_STYLE_ESCAPE_COUNT; ++i) {
if (tolower(escape[0]) == C_STYLE_ESCAPE_MAP[i].escape) {
*out = C_STYLE_ESCAPE_MAP[i].value;
return true;
}
}
return false;
}
// null byte on failure
static char escape_for_char(char to_escape) {
for (size_t i = 0; i < C_STYLE_ESCAPE_COUNT; ++i) {
if (to_escape == C_STYLE_ESCAPE_MAP[i].value) {
return C_STYLE_ESCAPE_MAP[i].escape;
}
}
return '\0';
}
char *ast_escape_string(const char *input, size_t input_len, size_t *out_len) {
size_t out_size = input_len + 1;
char *out = malloc(out_size);
size_t backup_len;
if (!out_len) {
out_len = &backup_len;
}
*out_len = 0;
for (size_t i = 0; i < input_len; ++i) {
char escape = escape_for_char(input[i]);
if (escape) {
out = realloc(out, ++out_size);
out[(*out_len)++] = '\\';
out[(*out_len)++] = escape;
} else if (input[i] == '"') {
out = realloc(out, ++out_size);
out[(*out_len)++] = '\\';
out[(*out_len)++] = '"';
} else {
out[(*out_len)++] = input[i];
}
}
out[(*out_len)] = '\0';
return out;
}
static const struct {
const char *escape;
char value;
} NAMED_CHAR_ESCAPE_MAP[] = {
{"newline", '\n'},
{"tab", '\t'},
{"return", '\r'},
{"vtab", '\v'},
{"page_break", '\f'},
{"backspace", '\b'},
{"alert", '\a'},
{"null", '\0'},
{"backslash", '\\'},
};
static const size_t NAMED_CHAR_COUNT = sizeof(NAMED_CHAR_ESCAPE_MAP) /
sizeof(NAMED_CHAR_ESCAPE_MAP[0]);
// true on success, false on failure
static bool convert_named_char_escape(const char *escape, wchar_t *out) {
for (size_t i = 0; i < NAMED_CHAR_COUNT; ++i) {
if (compat_strcasecmp(NAMED_CHAR_ESCAPE_MAP[i].escape, escape) == 0) {
*out = NAMED_CHAR_ESCAPE_MAP[i].value;
return true;
}
}
return false;
}
static AstIntNode *process_char_token(Token *token, AstErrorList **err) {
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
token->line, token->col);
// remove the # sign
char *sym = token->text + 1;
// special character
if (sym[0] == '\\') {
// account for '#' in token->len
if (token->len < 3) {
push_build_error(err, token, 1, "expected escape sequence");
free(node);
return NULL;
}
wchar_t c;
if (!convert_named_char_escape(sym + 1, &c) &&
!convert_c_style_char_escape(sym + 1, &c) &&
!convert_numeric_char_escape(sym + 1, &c, false)) {
free(token->text);
free(node);
push_build_error(err, token, 0,
"invalid escape sequence in character literal");
return NULL;
}
node->value = c;
} else {
node->value = sym[0];
}
return node;
}
static char *process_string_escapes(Token *token, size_t *out_len,
AstErrorList **err) {
const char *text = token->text;
size_t out_size = token->len + 1;
char *out = malloc(out_size);
*out_len = 0;
bool backslash = 0;
for (size_t i = 1; i < token->len - 1; ++i) {
if (!backslash && text[i] == '\\') {
backslash = true;
continue;
} else if (backslash && text[i] == '\n') {
// backslash can escape a newline
} else if (backslash) {
size_t count = 1;
wchar_t c;
if (!convert_c_style_char_escape(&text[i], &c) &&
!(count = convert_numeric_char_escape(&text[i], &c, true))) {
push_build_error(err, token, i, "invalid escape sequence");
return NULL;
}
if (out_size - *out_len - 1 < MB_CUR_MAX) {
out_size = out_size + MB_CUR_MAX - (out_size - *out_len - 1);
out = realloc(out, out_size);
}
*out_len += wctomb(out + *out_len, c);
i += count - 1;
} else {
if (*out_len >= out_size) {
out = realloc(out, out_size + token->len - i + 1);
}
out[(*out_len)++] = text[i];
}
backslash = false;
}
out = realloc(out, *out_len + 1);
out[*out_len] = '\0';
return out;
}
static AstStringNode *process_string_token(Token *token, AstErrorList **err) {
AstStringNode *node = make_ast_node(sizeof(AstStringNode), AST_TYPE_STRING,
token->line, token->col);
node->value = process_string_escapes(token, &node->length, err);
if (!node->value) {
free(node);
node = NULL;
}
return node;
}
static AstNode *make_null_node(size_t line, size_t col) {
return make_ast_node(sizeof(AstNode), AST_TYPE_NULL, line, col);
}
static AstNode *process_symbol_token(Token *token) {
if (strcmp(token->text, "nil") == 0) {
return (AstNode *) make_null_node(token->line, token->col);
}
AstSymbolNode *node = make_ast_node(sizeof(AstSymbolNode), AST_TYPE_SYMBOL,
token->line, token->col);
node->name = token->text;
node->name_length = token->len;
node->is_property = token->text[0] == ':';
node->skip_free = false;
token->text = NULL;
token->buf_len = 0;
return (AstNode *) node;
}
static int sign_for_match(Token *token, regmatch_t *match) {
if (match->rm_so != match->rm_eo &&
token->text[match->rm_so] == '-') {
return -1;
} else {
return 1;
}
}
static void break_number_for_matches(Token *token, regmatch_t *matches,
int main, int dec, uintmax_t *main_out,
uintmax_t *dec_out) {
const char *text = token->text;
regmatch_t *mm = &matches[main];
regmatch_t *dm = &matches[dec];
*main_out = 0;
// main number has at least 1 char
if (mm->rm_eo - mm->rm_so) {
*main_out = strtoumax(text + mm->rm_so, NULL, 10);
}
*dec_out = 0;
// decimal number has at least 1 char
if (dm->rm_eo - dm->rm_so) {
*dec_out = strtoumax(text + dm->rm_so, NULL, 10);
}
}
static AstNode *process_decimal_matches(Token *token, regmatch_t *matches) {
int main_sign = sign_for_match(token, &matches[1]);
int exp_sign = sign_for_match(token, &matches[5]);
uintmax_t main_main, main_dec;
break_number_for_matches(token, matches, 2, 3, &main_main, &main_dec);
uintmax_t exp_main, exp_dec;
break_number_for_matches(token, matches, 6, 7, &exp_main, &exp_dec);
if (main_dec == 0 && exp_dec == 0 && exp_sign == 1) {
// return an integer
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
token->line, token->col);
node->value = main_sign * main_main * pow(10, exp_main);
return (AstNode *) node;
} else {
// return a float
AstFloatNode *node = make_ast_node(sizeof(AstFloatNode), AST_TYPE_FLOAT,
token->line, token->col);
int main_dec_len = floor(log10(main_dec) + 1);
int exp_dec_len = floor(log10(exp_dec) + 1);
double main_dec_f = main_dec * pow(10, -main_dec_len);
double exp_dec_f = exp_dec * pow(10, -exp_dec_len);
node->value = main_sign * ((double) main_main + main_dec_f) *
pow(10, exp_sign * ((double) exp_main + exp_dec_f));
return (AstNode *) node;
}
}
static AstNode *process_non_decimal_matches(Token *token, regmatch_t *matches,
AstErrorList **err) {
// get the base
int base;
if (token->text[0] == '2' || token->text[0] == '8') {
base = token->text[0] - '0';
} else {
base = 10 + token->text[1] - '0';
}
int sign = sign_for_match(token, &matches[2]);
char *endptr;
uintmax_t num = strtoumax(&token->text[matches[3].rm_so], &endptr, base);
// num is the abs of our target, so only check against positive max
if (*endptr || num > INT64_MAX) {
push_build_error(err, token, 0, "invalid numeric literal");
return NULL;
}
AstIntNode *node = make_ast_node(sizeof(AstIntNode), AST_TYPE_INT,
token->line, token->col);
node->value = sign * (intmax_t) num;
return (AstNode *) node;
}
static AstNode *parse_number_token(Token *token, AstErrorList **err) {
regmatch_t matches[REGEX_NMATCH];
const char *text = token->text;
if (regexec(&DECIMAL_NUM_REGEX, text, REGEX_NMATCH, matches, 0) == 0) {
return process_decimal_matches(token, matches);
} else if (regexec(&NON_DECIMAL_NUM_REGEX, text, REGEX_NMATCH,
matches, 0) == 0) {
return process_non_decimal_matches(token, matches, err);
}
push_build_error(err, token, 0, "invalid numeric literal");
return NULL;
}
static bool is_node_symbol_t(AstNode *node) {
return node->type == AST_TYPE_SYMBOL &&
strcmp("t", ((AstSymbolNode *) node)->name) == 0;
}
static AstNode *simplify_quote_node(AstQuoteNode *node) {
AstNode *cur = (AstNode *) node;
while (cur->type == AST_TYPE_QUOTE) {
cur = ((AstQuoteNode *) cur)->form;
}
if (cur->type == AST_TYPE_NULL
|| cur->type == AST_TYPE_VECTOR
|| is_node_symbol_t(cur)) {
AstNode *inner = node->form;
node->form = NULL;
destroy_ast_node(node);
return inner;
}
return (AstNode *) node;
}
static AstNode *quote_ast_form(AstQuoteType type,
AstNode *form, size_t line, size_t col,
AstQuoteType in_quote) {
AstQuoteNode *node = make_ast_node(sizeof(AstQuoteNode), AST_TYPE_QUOTE,
line, col);
node->type = type;
node->form = form;
if (!in_quote) {
return simplify_quote_node(node);
}
return (AstNode *) node;
}
static AstNode *quote_next_toplevel(Token *token, TokenStream *stream,
AstQuoteType in_quote, AstErrorList **err) {
AstQuoteType my_type;
switch (token->type) {
case TOKEN_TYPE_QUOTE:
my_type = AST_QUOTE_NORM;
break;
case TOKEN_TYPE_BACKQUOTE:
my_type = AST_QUOTE_BACK;
break;
case TOKEN_TYPE_COMMA:
my_type = AST_QUOTE_COMMA;
break;
case TOKEN_TYPE_SPLICE:
my_type = AST_QUOTE_SPLICE;
break;
default:
// shouldn't happen
abort();
break;
}
if (in_quote != AST_QUOTE_BACK &&
(my_type == AST_QUOTE_COMMA || my_type == AST_QUOTE_SPLICE)) {
push_build_error(err, token, 0, "comma or splice not inside a backquote");
return NULL;
}
if (my_type > in_quote) {
in_quote = my_type;
}
AstNode *internal = ast_next_toplevel_internal(stream, in_quote, err);
if (!internal) {
// error already reported
return NULL;
}
return quote_ast_form(my_type, internal, token->line, token->col, in_quote);
}
static bool is_close_delim(Token *token) {
return (token->type == TOKEN_TYPE_PAREN || token->type == TOKEN_TYPE_BRACKET)
&& (token->text[0] == ')' || token->text[0] == ']');
}
static bool is_close_delim_for(Token *token, Token *child) {
if (token->type == child->type) {
switch (token->type) {
case TOKEN_TYPE_PAREN:
return child->text[0] == ')';
case TOKEN_TYPE_BRACKET:
return child->text[0] == ']';
default:
// fall-through
break;
}
}
return false;
}
static AstNode *process_next_list_or_vector(Token *token, TokenStream *stream,
size_t size, AstType type,
off_t child_arr_off,
off_t child_count_off,
AstQuoteType in_quote,
AstErrorList **err) {
if (is_close_delim(token)) {
push_build_error(err, token, 0, "unmatched closing delimiter");
return NULL;
}
AstNode *node = make_ast_node(size, type, token->line, token->col);
AstNode ***child_arr_ptr = (void *) node + child_arr_off;
size_t *child_count_ptr = (void *) node + child_count_off;
*child_arr_ptr = NULL;
*child_count_ptr = 0;
bool error = false;
Token ctok;
ctok.text = NULL;
ctok.buf_len = 0;
while (true) {
if (!next_token(stream, &ctok, err)) {
// node MUST be valid for this to work
destroy_ast_node(node);
node = NULL;
break;
}
if (is_close_delim_for(token, &ctok)) {
break;
}
AstNode *cnode = process_token(&ctok, stream, in_quote, err);
if (!cnode) {
error = true;
if (token_stream_is_eof(stream)) {
push_build_error(err, token, 0, "unmatched opening delimiter");
break;
}
}
*child_arr_ptr = realloc(*child_arr_ptr, sizeof(AstNode *) *
++(*child_count_ptr));
(*child_arr_ptr)[(*child_count_ptr) - 1] = cnode;
}
free(ctok.text);
if (error) {
destroy_ast_node(node);
return NULL;
}
return node;
}
static bool is_quote_symbol_node(AstNode *node) {
return node->type == AST_TYPE_SYMBOL &&
strcmp(((AstSymbolNode *) node)->name, "quote") == 0;
}
static AstNode *process_next_list(Token *token, TokenStream *stream,
AstQuoteType in_quote, AstErrorList **err) {
AstListNode *node = (AstListNode *)
process_next_list_or_vector(token, stream, sizeof(AstListNode),
AST_TYPE_LIST, offsetof(AstListNode, children),
offsetof(AstListNode, nchildren), in_quote, err);
if (!node) {
return NULL;
} else if (node->nchildren == 0) {
destroy_ast_node(node);
return (AstNode *) make_null_node(token->line, token->col);
}
bool is_quote = is_quote_symbol_node(node->children[0]);
if (is_quote && node->nchildren != 2) {
push_build_error(err, token, 0, "quote expects one argument, got %zu",
node->nchildren);
destroy_ast_node(node);
return NULL;
} else if (is_quote) {
AstNode *internal = node->children[1];
node->nchildren = 1;
destroy_ast_node(node);
return (AstNode *)quote_ast_form(AST_QUOTE_NORM, internal,
token->line, token->col,
in_quote);
}
return (AstNode *) node;
}
// true on success, false on error
static bool next_token(TokenStream *stream, Token *out, AstErrorList **err) {
out->text = NULL;
out->buf_len = 0;
do {
token_stream_next(stream, out);
ParseError *parse_err;
bool had_error = false;
while ((parse_err = token_stream_error(stream))) {
push_parse_error(err, parse_err);
had_error = true;
}
if (had_error) {
free(out->text);
out->text = NULL;
out->buf_len = 0;
return false;
}
} while (out->type == TOKEN_TYPE_COMMENT);
return true;
}
static AstNode *process_token(Token *token, TokenStream *stream,
AstQuoteType in_quote, AstErrorList **err) {
AstNode *retval = NULL;
switch (token->type) {
case TOKEN_TYPE_CHAR:
retval = (AstNode *) process_char_token(token, err);
break;
case TOKEN_TYPE_NUMBER:
retval = parse_number_token(token, err);
break;
case TOKEN_TYPE_STRING:
retval = (AstNode *) process_string_token(token, err);
break;
case TOKEN_TYPE_SYMBOL:
case TOKEN_TYPE_PROPERTY:
retval = (AstNode *) process_symbol_token(token);
break;
case TOKEN_TYPE_BACKQUOTE:
case TOKEN_TYPE_COMMA:
case TOKEN_TYPE_SPLICE:
case TOKEN_TYPE_QUOTE:
retval = (AstNode *) quote_next_toplevel(token, stream, in_quote, err);
break;
case TOKEN_TYPE_PAREN:
retval = process_next_list(token, stream, in_quote, err);
break;
case TOKEN_TYPE_BRACKET:
retval = process_next_list_or_vector(token, stream, sizeof(AstVectorNode),
AST_TYPE_VECTOR,
offsetof(AstVectorNode, children),
offsetof(AstVectorNode, nchildren),
AST_QUOTE_NORM, err);
break;
case TOKEN_TYPE_UNKNOWN:
push_build_error(err, token, 0, "unknown token");
break;
case TOKEN_TYPE_EOF:
// do nothing
break;
case TOKEN_TYPE_COMMENT:
// shouldn't happen
abort();
break;
}
return retval;
}
static AstNode *ast_next_toplevel_internal(TokenStream *stream,
AstQuoteType in_quote,
AstErrorList **err) {
Token token;
token.text = NULL;
token.buf_len = 0;
if (!next_token(stream, &token, err)) {
return NULL;
}
return process_token(&token, stream, in_quote, err);
}
AstNode *ast_next_toplevel(TokenStream *stream, AstErrorList **err) {
return ast_next_toplevel_internal(stream, AST_QUOTE_NONE, err);
}
void destroy_ast_node(void *node) {
if (!node) {
return;
}
switch (((AstNode *)node)->type) {
case AST_TYPE_LIST:
for (size_t i = 0; i < ((AstListNode *) node)->nchildren; ++i) {
destroy_ast_node(((AstListNode *) node)->children[i]);
}
free(((AstListNode *) node)->children);
break;
case AST_TYPE_VECTOR:
for (size_t i = 0; i < ((AstVectorNode *) node)->nchildren; ++i) {
destroy_ast_node(((AstVectorNode *) node)->children[i]);
}
free(((AstVectorNode *) node)->children);
break;
case AST_TYPE_STRING:
free(((AstStringNode *) node)->value);
break;
case AST_TYPE_SYMBOL:
if (!((AstSymbolNode *)node)->skip_free) {
free(((AstSymbolNode *) node)->name);
}
break;
case AST_TYPE_QUOTE:
destroy_ast_node(((AstQuoteNode *) node)->form);
break;
case AST_TYPE_INT:
case AST_TYPE_FLOAT:
case AST_TYPE_NULL:
break;
}
free(node);
}
static const char *str_for_ast_quote_type(AstQuoteType type) {
switch (type) {
case AST_QUOTE_NONE:
return "";
case AST_QUOTE_NORM:
return "'";
case AST_QUOTE_BACK:
return "`";
case AST_QUOTE_COMMA:
return ",";
case AST_QUOTE_SPLICE:
return ",@";
}
}
static void ast_prin1_node_internal(AstNode *node, FILE *stream, int padding,
bool skip_print_pad) {
if (!skip_print_pad) {
for (int i = 0; i < padding; ++i) {
fputc(' ', stream);
}
}
switch (node->type) {
case AST_TYPE_INT: {
int64_t value = ((AstIntNode *) node)->value;
fprintf(stream, "%" PRId64 " (", value);
char escape;
if ((escape = escape_for_char((char) value))) {
fprintf(stream, "#\\%c, ", escape);
} else if (isprint(value)) {
fprintf(stream, "#%c, ", (char) value);
}
if (value < 0) {
fputc('-', stream);
value *= -1;
}
fprintf(stream, "0x%" PRIx64 ")", value);
}
break;
case AST_TYPE_STRING: {
size_t escaped_len;
char *escaped_string = ast_escape_string(((AstStringNode *)node)->value,
((AstStringNode *)node)->length,
&escaped_len);
fputc('"', stream);
fwrite(escaped_string, 1, escaped_len, stream);
fputc('"', stream);
}
break;
case AST_TYPE_SYMBOL:
fwrite(((AstSymbolNode *) node)->name, 1,
((AstSymbolNode *) node)->name_length, stream);
break;
case AST_TYPE_FLOAT:
fprintf(stream, "%g", ((AstFloatNode *) node)->value);
break;
case AST_TYPE_LIST: {
dump_node_list(((AstListNode *) node)->children,
((AstListNode *) node)->nchildren,
'(', ')', padding, stream);
}
break;
case AST_TYPE_VECTOR:
dump_node_list(((AstVectorNode *) node)->children,
((AstVectorNode *) node)->nchildren,
'[', ']', padding, stream);
break;
case AST_TYPE_QUOTE: {
const char *quote_str = str_for_ast_quote_type(((AstQuoteNode *) node)->type);
fprintf(stream, "%s", quote_str);
padding += strlen(quote_str);
ast_prin1_node_internal(((AstQuoteNode *) node)->form, stream,
padding, true);
}
break;
case AST_TYPE_NULL:
fwrite("nil", 1, 3, stream);
break;
}
}
static void dump_node_list(AstNode **list, size_t count, char sdelim,
char edelim, int padding, FILE *stream) {
fputc(sdelim, stream);
if (count) {
ast_prin1_node_internal(list[0], stream, padding + 1, true);
}
for (size_t i = 1; i < count; ++i) {
fputc('\n', stream);
ast_prin1_node_internal(list[i], stream, padding + 1, false);
}
fputc(edelim, stream);
}
void ast_prin1_node(AstNode *node, FILE *stream) {
ast_prin1_node_internal(node, stream, 0, false);
fputc('\n', stream);
}
char *ast_prin1_node_to_string(AstNode *node, size_t *out_len) {
size_t backup_outlen = 0;
if (!out_len) {
out_len = &backup_outlen;
}
char *outbuf = NULL;
FILE *stream = open_memstream(&outbuf, out_len);
ast_prin1_node(node, stream);
fclose(stream);
return outbuf;
}
AstErrorList *ast_error_list_pop(AstErrorList **list) {
AstErrorList *top = *list;
if (*list) {
*list = (*list)->next;
}
return top;
}
void ast_error_list_free_one(AstErrorList *list) {
if (list) {
switch (list->type) {
case AST_ERROR_PARSE:
parse_error_free(list->parse);
break;
case AST_ERROR_BUILD:
free(list->build.msg);
token_free(&list->build.token);
break;
}
free(list);
}
}
void ast_error_list_free_all(AstErrorList *list) {
while (list) {
AstErrorList *next = list->next;
ast_error_list_free_one(list);
list = next;
}
}
static const char *start_of_last_line(const char *str, size_t len,
size_t *line_len, size_t *num_passed) {
*num_passed = 0;
*line_len = 0;
const char *retval = str;
size_t i;
for (i = len; i > 0; --i) {
if (str[i - 1] == '\n' && *line_len) {
retval = &str[i];
break;
} else if (str[i - 1] != '\n') {
++(*line_len);
}
}
for (; i > 0; --i) {
if (str[i - 1] == '\n') {
++*num_passed;
}
}
return retval;
}
void ast_format_error(AstErrorList *err, const char *file_name, FILE *stream) {
if (!err) {
return;
}
fprintf(stream, "error: ");
if (file_name) {
fprintf(stream, "%s: ", file_name);
}
switch (err->type) {
case AST_ERROR_PARSE: {
size_t line_len;
size_t num_passed;
const char *last_line = start_of_last_line(err->parse->context,
strlen(err->parse->context),
&line_len, &num_passed);
fprintf(stream, "%zu:%zu: %s\n ", err->parse->line + num_passed,
err->parse->at_end ? err->parse->col + line_len - 1 :
err->parse->col, err->parse->desc);
fwrite(last_line, 1, line_len, stream);
fwrite("\n ", 1, 3, stream);
if (err->parse->at_end) {
for (size_t i = 1; i < line_len; ++i) {
fputc(' ', stream);
}
}
fwrite("^\n", 1, 2, stream);
}
break;
case AST_ERROR_BUILD:
fprintf(stream, "%zu:%zu: %s\n %s\n ", err->build.token.line,
err->build.token.col + err->build.off,
err->build.msg, err->build.token.text);
for (size_t i = 1; i <= err->build.off; ++i) {
fputc(' ', stream);
}
fwrite("^\n", 1, 2, stream);
break;
}
}