#include "token.h" #include "util.h" #include #include #include #include const char *token_type_to_string(TokenType type) { static const char *const TYPES[] = { [TOKEN_LET] = "LET", [TOKEN_EVAL] = "EVAL", [TOKEN_CONF] = "CONF", [TOKEN_EQUALS] = "EQUALS", [TOKEN_BACKSLASH] = "BACKSLASH", [TOKEN_COLON] = "COLON", [TOKEN_OPEN_PAREN] = "OPEN_PAREN", [TOKEN_CLOSE_PAREN] = "CLOSE_PAREN", [TOKEN_DEFINE] = "DEFINE", [TOKEN_IDENT] = "IDENT", [TOKEN_REDUCE] = "REDUCE", [TOKEN_EOF] = "EOF", }; assert(type >= 0 && type < (sizeof(TYPES) / sizeof(TYPES[0]))); return TYPES[type]; } #define EOS -1 static bool is_whitespace(char c) { return c == ' ' || c == '\t' || c == '\n'; } static bool is_identifier(char c) { return (c >= 'a' && c <= 'z') || c == '_'; } static bool is_reduce(char c) { return (c >= 'a' && c <= 'z') || c == '*' || c == '~' || c == ':'; } static int peekc(TokenStream *stream) { if (stream->pos.offset >= stream->src_len) { return EOS; } return stream->src[stream->pos.offset]; } static int popc(TokenStream *stream) { if (stream->pos.offset >= stream->src_len) { return EOS; } int c = stream->src[stream->pos.offset++]; if (c == '\n') { stream->pos.line_offset = stream->pos.offset; ++stream->pos.line; stream->pos.column = 0; } else { ++stream->pos.column; } return c; } static void skip_whitespace(TokenStream *stream) { while (is_whitespace(peekc(stream))) { popc(stream); } } static void discard_until_newline(TokenStream *stream) { int c; while ((c = popc(stream)) != '\n' && c != EOS) { } } static TokenType typeof_single_char_token(char c) { switch (c) { case '=': return TOKEN_EQUALS; case '\\': return TOKEN_BACKSLASH; case ':': return TOKEN_COLON; case '(': return TOKEN_OPEN_PAREN; case ')': return TOKEN_CLOSE_PAREN; default: abort(); } } static inline void set_token(Token *out, const SrcPos *pos, TokenType type, size_t length) { out->pos = *pos; out->type = type; out->length = length; } static int pprint_character(int c, char *buf, size_t buf_size) { if (c == EOS) { return snprintf(buf, buf_size, "EOF"); } else if (isprint(c)) { return snprintf(buf, buf_size, "'%c'", (char) c); } else { return snprintf(buf, buf_size, "'0x%02Xd'", c); } } ATTR_FORMAT(3, 4) static void sprintf_error(ParseError *error, const SrcPos *pos, const char *restrict fmt, ...) { if (error) { error->set = true; error->pos = *pos; va_list args; va_start(args, fmt); vsnprintf(error->message, sizeof(error->message), fmt, args); va_end(args); } } // and now for the actual parser #define POS (&stream->pos) #define SET_OUT(p, t, l) set_token(out, (p), (t), (l)) #define SET_EOF() SET_OUT(POS, TOKEN_EOF, 0) #define DISCARD_CHAR() popc(stream) #define DEF_PPRINT(var, c) \ char var[8]; \ pprint_character((c), var, sizeof(var)) #define BYTES_LEFT (stream->src_len - stream->pos.offset) static bool read_next_ident(TokenStream *restrict stream, Token *restrict out); // consume = before using static bool read_next_reduce(TokenStream *restrict stream, Token *restrict out, ParseError *restrict error); bool token_stream_next(TokenStream *restrict stream, Token *restrict out, ParseError *restrict error) { if (error) { error->set = false; } restart: skip_whitespace(stream); int c = peekc(stream); switch (c) { case EOS: SET_EOF(); return true; case '\\': case ':': case '(': case ')': SET_OUT(POS, typeof_single_char_token(c), 1); DISCARD_CHAR(); return true; case '=': { SrcPos start = *POS; DISCARD_CHAR(); int next = peekc(stream); if (next == EOS || is_whitespace(next)) { SET_OUT(&start, typeof_single_char_token(c), 1); return true; } // note the '=' was already consumed return read_next_reduce(stream, out, error); } case '-': { SrcPos start = *POS; popc(stream); // first '-' int next = popc(stream); if (next == EOS) { sprintf_error(error, POS, "unexpected EOF"); return false; } else if (next == '-') { discard_until_newline(stream); goto restart; } SET_OUT(&start, TOKEN_DEFINE, 2); return true; } default: if (!is_identifier(c)) { DEF_PPRINT(pp, c); sprintf_error(error, POS, "expected identifier, got %s", pp); return false; } return read_next_ident(stream, out); } } static bool read_next_ident(TokenStream *restrict stream, Token *restrict out) { SrcPos start = *POS; size_t len = 0; int c; while (is_identifier((c = peekc(stream)))) { popc(stream); ++len; } if (len == 3 && BYTES_LEFT >= 3 && memcmp(&stream->src[start.offset], "let", 3) == 0) { SET_OUT(&start, TOKEN_LET, len); } else if (len == 4 && BYTES_LEFT >= 4 && memcmp(&stream->src[start.offset], "conf", 4) == 0) { SET_OUT(&start, TOKEN_CONF, len); } else if (len == 4 && BYTES_LEFT >= 4 && memcmp(&stream->src[start.offset], "eval", 4) == 0) { SET_OUT(&start, TOKEN_EVAL, len); } else { SET_OUT(&start, TOKEN_IDENT, len); } return true; } static bool read_next_reduce(TokenStream *restrict stream, Token *restrict out, ParseError *restrict error) { size_t len = 0; SrcPos start = *POS; int c; while (is_reduce(c = peekc(stream))) { popc(stream); ++len; } if (c == '>') { SET_OUT(&start, TOKEN_REDUCE, len); DISCARD_CHAR(); return true; } else { DEF_PPRINT(pp, c); sprintf_error(error, POS, "expected '>', got %s", pp); return false; } }