Files
lambda/token.c
2026-04-03 13:21:17 -07:00

235 lines
6.2 KiB
C

#include "token.h"
#include "util.h"
#include <assert.h>
#include <ctype.h>
#include <stdarg.h>
#include <string.h>
const char *token_type_to_string(TokenType type) {
static const char *const TYPES[] = {
[TOKEN_LET] = "LET",
[TOKEN_EVAL] = "EVAL",
[TOKEN_CONF] = "CONF",
[TOKEN_EQUALS] = "EQUALS",
[TOKEN_BACKSLASH] = "BACKSLASH",
[TOKEN_COLON] = "COLON",
[TOKEN_OPEN_PAREN] = "OPEN_PAREN",
[TOKEN_CLOSE_PAREN] = "CLOSE_PAREN",
[TOKEN_DEFINE] = "DEFINE",
[TOKEN_IDENT] = "IDENT",
[TOKEN_REDUCE] = "REDUCE",
[TOKEN_EOF] = "EOF",
};
assert(type >= 0 && type < (sizeof(TYPES) / sizeof(TYPES[0])));
return TYPES[type];
}
#define EOS -1
static bool is_whitespace(char c) {
return c == ' ' || c == '\t' || c == '\n';
}
static bool is_identifier(char c) {
return (c >= 'a' && c <= 'z') || c == '_';
}
static bool is_reduce(char c) {
return (c >= 'a' && c <= 'z') || c == '*' || c == '~' || c == ':';
}
static int peekc(TokenStream *stream) {
if (stream->pos.offset >= stream->src_len) {
return EOS;
}
return stream->src[stream->pos.offset];
}
static int popc(TokenStream *stream) {
if (stream->pos.offset >= stream->src_len) {
return EOS;
}
int c = stream->src[stream->pos.offset++];
if (c == '\n') {
stream->pos.line_offset = stream->pos.offset;
++stream->pos.line;
stream->pos.column = 0;
} else {
++stream->pos.column;
}
return c;
}
static void skip_whitespace(TokenStream *stream) {
while (is_whitespace(peekc(stream))) {
popc(stream);
}
}
static void discard_until_newline(TokenStream *stream) {
int c;
while ((c = popc(stream)) != '\n' && c != EOS) {
}
}
static TokenType typeof_single_char_token(char c) {
switch (c) {
case '=':
return TOKEN_EQUALS;
case '\\':
return TOKEN_BACKSLASH;
case ':':
return TOKEN_COLON;
case '(':
return TOKEN_OPEN_PAREN;
case ')':
return TOKEN_CLOSE_PAREN;
default:
abort();
}
}
static inline void set_token(Token *out, const SrcPos *pos, TokenType type,
size_t length) {
out->pos = *pos;
out->type = type;
out->length = length;
}
static int pprint_character(int c, char *buf, size_t buf_size) {
if (c == EOS) {
return snprintf(buf, buf_size, "EOF");
} else if (isprint(c)) {
return snprintf(buf, buf_size, "'%c'", (char) c);
} else {
return snprintf(buf, buf_size, "'0x%02Xd'", c);
}
}
ATTR_FORMAT(3, 4)
static void sprintf_error(ParseError *error, const SrcPos *pos,
const char *restrict fmt, ...) {
if (error) {
error->set = true;
error->pos = *pos;
va_list args;
va_start(args, fmt);
vsnprintf(error->message, sizeof(error->message), fmt, args);
va_end(args);
}
}
// and now for the actual parser
#define POS (&stream->pos)
#define SET_OUT(p, t, l) set_token(out, (p), (t), (l))
#define SET_EOF() SET_OUT(POS, TOKEN_EOF, 0)
#define DISCARD_CHAR() popc(stream)
#define DEF_PPRINT(var, c) \
char var[8]; \
pprint_character((c), var, sizeof(var))
#define BYTES_LEFT (stream->src_len - stream->pos.offset)
static bool read_next_ident(TokenStream *restrict stream, Token *restrict out);
// consume = before using
static bool read_next_reduce(TokenStream *restrict stream, Token *restrict out,
ParseError *restrict error);
bool token_stream_next(TokenStream *restrict stream, Token *restrict out,
ParseError *restrict error) {
if (error) {
error->set = false;
}
restart:
skip_whitespace(stream);
int c = peekc(stream);
switch (c) {
case EOS:
SET_EOF();
return true;
case '\\':
case ':':
case '(':
case ')':
SET_OUT(POS, typeof_single_char_token(c), 1);
DISCARD_CHAR();
return true;
case '=': {
SrcPos start = *POS;
DISCARD_CHAR();
int next = peekc(stream);
if (next == EOS || is_whitespace(next)) {
SET_OUT(&start, typeof_single_char_token(c), 1);
return true;
}
// note the '=' was already consumed
return read_next_reduce(stream, out, error);
}
case '-': {
SrcPos start = *POS;
popc(stream); // first '-'
int next = popc(stream);
if (next == EOS) {
sprintf_error(error, POS, "unexpected EOF");
return false;
} else if (next == '-') {
discard_until_newline(stream);
goto restart;
}
SET_OUT(&start, TOKEN_DEFINE, 2);
return true;
}
default:
if (!is_identifier(c)) {
DEF_PPRINT(pp, c);
sprintf_error(error, POS, "expected identifier, got %s", pp);
return false;
}
return read_next_ident(stream, out);
}
}
static bool read_next_ident(TokenStream *restrict stream, Token *restrict out) {
SrcPos start = *POS;
size_t len = 0;
int c;
while (is_identifier((c = peekc(stream)))) {
popc(stream);
++len;
}
if (len == 3 && BYTES_LEFT >= 3
&& memcmp(&stream->src[start.offset], "let", 3) == 0) {
SET_OUT(&start, TOKEN_LET, len);
} else if (len == 4 && BYTES_LEFT >= 4
&& memcmp(&stream->src[start.offset], "conf", 4) == 0) {
SET_OUT(&start, TOKEN_CONF, len);
} else if (len == 4 && BYTES_LEFT >= 4
&& memcmp(&stream->src[start.offset], "eval", 4) == 0) {
SET_OUT(&start, TOKEN_EVAL, len);
} else {
SET_OUT(&start, TOKEN_IDENT, len);
}
return true;
}
static bool read_next_reduce(TokenStream *restrict stream, Token *restrict out,
ParseError *restrict error) {
size_t len = 0;
SrcPos start = *POS;
int c;
while (is_reduce(c = peekc(stream))) {
popc(stream);
++len;
}
if (c == '>') {
SET_OUT(&start, TOKEN_REDUCE, len);
DISCARD_CHAR();
return true;
} else {
DEF_PPRINT(pp, c);
sprintf_error(error, POS, "expected '>', got %s", pp);
return false;
}
}