diff --git a/.clangd b/.clangd index bf16b8a..8441862 100644 --- a/.clangd +++ b/.clangd @@ -6,7 +6,8 @@ CompileFlags: -Wpedantic, -xc, -D_POSIX_C_SOURCE=199309L, - "-fsanitize=address,undefined", + "-Isrc", + "-I../", ] Compiler: gcc --- @@ -14,8 +15,7 @@ If: PathMatch: .*\.h CompileFlags: Remove: -xc - Add: [-std=c11, -Wall, -Wpedantic, -xc-header] - Compiler: gcc + Add: -xc-header --- If: PathMatch: bin/.*\.c diff --git a/Makefile b/Makefile index c9e2e73..e096bcb 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ CFLAGS=$(DEBUG_CFLAGS) $(LLVM_SAN_FLAGS) -std=c11 -Wall -Wpedantic $\ LD=gcc LDFLAGS=$(LLVM_SAN_FLAGS) -SRCS:=$(wildcard src/*.c) +SRCS:=$(wildcard src/*.c) $(wildcard src/byterun/*.c) OBJS:=$(SRCS:src/%.c=bin/%.o) DEPS:=$(SRCS:src/%.c=bin/deps/%.d) @@ -34,12 +34,11 @@ glisp: $(OBJS) $(LD) $(LDFLAGS) -o $@ $^ bin/init_globals.c: $(filter-out bin/init_globals.c,$(SRCS_WITH_HEADERS)) src/gen-init-globals.awk - @mkdir -p bin/ awk -f src/gen-init-globals.awk $(filter-out src/gen-init-globals.awk,$^) >$@ bin/%.o: src/%.c - @mkdir -p bin/deps - $(CC) $(CFLAGS) -c -MMD -MF $(<:src/%.c=bin/deps/%.d) -o $@ $< + @mkdir -p $(dir $(<:src/%.c=bin/deps/%.d) $(<:src/%.c=bin/%)) + $(CC) $(CFLAGS) -c -MMD -MF $(<:src/%.c=bin/deps/%.d) -I src/ -o $@ $< bin/init_globals.o: bin/init_globals.c @mkdir -p bin/deps diff --git a/src/base.h b/src/base.h index 9e699a8..3d83446 100644 --- a/src/base.h +++ b/src/base.h @@ -76,15 +76,16 @@ static ALWAYS_INLINE LispVal *MAKE_LISP_FLOAT(lisp_float_t flt) { // ############### // # Other types # // ############### +// Make sure this is kept up to date with byterun.h typedef enum { - TYPE_FIXNUM, - TYPE_FLOAT, - TYPE_CONS, - TYPE_STRING, - TYPE_SYMBOL, - TYPE_VECTOR, - TYPE_HASH_TABLE, - TYPE_FUNCTION, + TYPE_FIXNUM = 0, + TYPE_FLOAT = 1, + TYPE_CONS = 2, + TYPE_STRING = 3, + TYPE_SYMBOL = 4, + TYPE_VECTOR = 5, + TYPE_HASH_TABLE = 6, + TYPE_FUNCTION = 7, N_LISP_TYPES, } LispValType; extern const char *LISP_TYPE_NAMES[N_LISP_TYPES]; diff --git a/src/byterun/BYTECODE.md b/src/byterun/BYTECODE.md new file mode 100644 index 0000000..44c7664 --- /dev/null +++ b/src/byterun/BYTECODE.md @@ -0,0 +1,59 @@ +The bytecode interpreter runs using a stack. Most instructions operate on the +stack. Each instruction starts with a 1 byte opcode. If it has arguments, they +come next (and are detailed below). All functions push their return value onto +the stack (though some produce no return value). + +The leftmost (most significant) bit of the opcode is the "save flag" if it is +set, the instruction does not remove its arguments from the stack. + +Each block of bytecode (function, etc.) starts with some constants. They have +the following format + +WORDSIZE bytes - Number of constants +NOTE: in the below MINSIZE is the minimum number of bytes needed to reach any +object, signed. +The constants start with 1 byte for type (as in base.h). Their formats follow: +FIXNUM: WORDSIZE bytes (for the value with the tag already set). +FLOAT: WORDSIZE bytes (for the value with the tag already set). +CONS: two MINSIZE bytes fields for the car and cdr. +STRING: WORDSIZE bytes for the length, then the bytes themselves. +SYMBOL: WORDSIZE bytes for the length, then the bytes themselves. These are for +the name. The VM interns the symbol when it loads the bytecode. +VECTOR: WORDSIZE bytes for the length. Then MINSIZE bytes for each element. +HASH_TABLE: Not a valid constant +FUNCTION: TODO - Not implemented yet (this will include lambdas). +LIST (Type code is FUNCTION + 1): Ditto vector, but a list is created +instead. This is just a convenience. + +Instructions (Name - Opcode): + +NIL - 0x0: Push nil onto the stack. + +CONS - 0x1: Return a cons of the top of the stack. The top becomes the cdr and +the next element the car. + +CAR - 0x2: Return the car of the cons on top of the stack. + +CDR - 0x3: Return the cdr of the cons on top of the stack. + +NULL - 0x4: Return t if the top if the stack is nil, otherwise return nil. + +POP - 0x5: Throw away the top of the stack (produces no value). Doing this with +the save flag set is a noop. + +COPY - 0x6: Takes one argument which is a zero-indexed offset from the top of +the stack (as in 0 is the top). Pushes a copy of the element onto the top of the +stack. + +LOAD - 0x7: Load the constant numbered onto the top of the stack. This takes no +stack arguments. It takes one constant argument (which is the smallest width +necessary to load all constants). This argument is simply a number. + +EQ - 0x8: If the top two elements of the stack are the same, object, return t, +otherwise return nil. + +FUNCALL - 0x9: Call the function on top of the stack. The second argument is the +number of arguments. That many elements are then used as the arguments. The +first argument is the one directly following the count. + +PRINT - 0x7f: DEBUG ONLY - print the top of the stack diff --git a/src/byterun/assemble.c b/src/byterun/assemble.c new file mode 100644 index 0000000..eda2882 --- /dev/null +++ b/src/byterun/assemble.c @@ -0,0 +1,210 @@ +#include "assemble.h" + +#include "base.h" +#include "memory.h" + +#include +#include +#include +#include + +BytecodeOpcode bytecode_opcode_by_name(const char *name, size_t length) { + struct Bucket { + size_t name_len; + const char *name; + BytecodeOpcode opcode; + struct Bucket *next; + }; + static struct Bucket *TABLE[26] = {NULL}; + static bool did_init = false; + if (!did_init) { +#define PUT(mname) \ + { \ + struct Bucket *n = lisp_malloc(sizeof(struct Bucket)); \ + n->name_len = sizeof(#mname) - 1; \ + n->name = #mname; \ + n->opcode = BO_##mname; \ + size_t hash = tolower(#mname[0]) - 'a'; \ + n->next = TABLE[hash]; \ + TABLE[hash] = n; \ + } + PUT(NIL); + PUT(CONS); + PUT(CAR); + PUT(CDR); + PUT(NULL); + PUT(POP); + PUT(COPY); + PUT(LOAD); + PUT(EQ); + PUT(FUNCALL); + PUT(PRINT); +#undef PUT + did_init = true; + } + if (!length) { + return BO_INVALID; + } + size_t hash = tolower(name[0]) - 'a'; + assert(0 <= hash && hash <= 25); + for (struct Bucket *cur = TABLE[hash]; cur; cur = cur->next) { + if (cur->name_len == length && strcasecmp(cur->name, name) == 0) { + return cur->opcode; + } + } + return BO_INVALID; +} + +const char *bytecode_opcode_name(BytecodeOpcode opcode, size_t *length) { +#define DEF(name) \ + case BO_##name: \ + if (length) { \ + *length = sizeof(#name) - 1; \ + } \ + return #name + switch (opcode) { + DEF(NIL); + DEF(CONS); + DEF(CAR); + DEF(CDR); + DEF(NULL); + DEF(POP); + DEF(COPY); + DEF(LOAD); + DEF(EQ); + DEF(FUNCALL); + DEF(PRINT); + case BO_INVALID: + return NULL; + default: + abort(); + } +#undef DEF +} + +BytecodeConstType bytecode_const_type_by_name(const char *name, size_t length) { +#define CHECK(type) \ + if (strcasecmp(name, #type)) { \ + return BYTECODE_CONST_##type; \ + } + CHECK(FIXNUM); + CHECK(FLOAT); + CHECK(CONS); + CHECK(STRING); + CHECK(SYMBOL); + CHECK(VECTOR); + CHECK(FUNCTION); + CHECK(LIST); + return BYTECODE_CONST_INVALID; +#undef CHECK +} + +const char *bytecode_const_name(BytecodeConstType type, size_t *length) { +#define DEF(name) \ + case BYTECODE_CONST_##name: \ + if (length) { \ + *length = sizeof(#name) - 1; \ + } \ + return #name; + switch (type) { + DEF(FIXNUM); + DEF(FLOAT); + DEF(CONS); + DEF(STRING); + DEF(SYMBOL); + DEF(VECTOR); + DEF(FUNCTION); + DEF(LIST); + case BO_INVALID: + return NULL; + default: + abort(); + } +} + +enum AssembleMode { MODE_NONE, MODE_CONSTANTS, MODE_TEXT }; + +typedef struct { + BytecodeConstType type; + union { + fixnum_t fixnum; + lisp_float_t lisp_float; + struct { + intptr_t car; + intptr_t cdr; + } cons; + struct { + size_t length; + const char *bytes; + } string; + struct { + size_t length; + const char *bytes; + } symbol; + struct { + size_t length; + intptr_t elements; + } vector; + struct { + // TODO implement + int dummy; + } function; + struct { + size_t length; + intptr_t elements; + } list; + }; +} AssembleConstant; + +struct AssembleState { + enum AssembleMode mode; + size_t line_no; + + size_t num_constants; + AssembleConstant *constants; + size_t num_instructions; +}; + +static bool is_blank_string(const char *data, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (data[i] != ' ' && data[i] != '\t') { + return false; + } + } + return true; +} + +static BytecodeAssembleStatus +process_line_none_mode(struct AssembleState *restrict state, + const size_t line_len, const char line[line_len], + BytecodeAssembleError *restrict error) {} + +BytecodeAssembleStatus +bytecode_assemble(const size_t src_length, const char src[src_length], + size_t *restrict out_length, char **restrict out, + BytecodeAssembleError *restrict error) { + StringStream stream; + string_stream_init(&stream); + struct AssembleState state = { + .mode = MODE_NONE, + .line_no = 1, + .num_constants = 0, + .constants = NULL, + .num_instructions = 0, + }; + const char *line = NULL; + size_t line_len; + while (strgetline(src, src_length, &line, &line_len)) { + switch (state.mode) { + case MODE_NONE: + process_line_none_mode(&state, line_len, line, error); + break; + case MODE_CONSTANTS: + case MODE_TEXT: + break; + } + ++state.line_no; + } + string_stream_steal(&stream, out, out_length); + return BYTECODE_ASSEMBLE_OK; +} diff --git a/src/byterun/assemble.h b/src/byterun/assemble.h new file mode 100644 index 0000000..d6d8a48 --- /dev/null +++ b/src/byterun/assemble.h @@ -0,0 +1,32 @@ +#ifndef INCLUDED_ASSEMBLE_H +#define INCLUDED_ASSEMBLE_H + +#include "byterun.h" + +#include + +BytecodeOpcode bytecode_opcode_by_name(const char *name, size_t length); +const char *bytecode_opcode_name(BytecodeOpcode opcode, size_t *length); + +BytecodeConstType bytecode_const_type_by_name(const char *name, size_t length); +const char *bytecode_const_name(BytecodeConstType type, size_t *length); + +typedef enum { + BYTECODE_ASSEMBLE_OK, + BYTECODE_ASSEMBLE_SYNTAX, +} BytecodeAssembleStatus; + +typedef struct { + size_t line; + size_t col; + size_t length; + const char *detail; +} BytecodeAssembleError; + +BytecodeAssembleStatus bytecode_assemble(const size_t src_length, + const char src[src_length], + size_t *restrict out_length, + char **restrict out, + BytecodeAssembleError *restrict error); + +#endif diff --git a/src/byterun/byterun.c b/src/byterun/byterun.c new file mode 100644 index 0000000..267d70e --- /dev/null +++ b/src/byterun/byterun.c @@ -0,0 +1 @@ +#include "byterun.h" diff --git a/src/byterun/byterun.h b/src/byterun/byterun.h new file mode 100644 index 0000000..59c70d5 --- /dev/null +++ b/src/byterun/byterun.h @@ -0,0 +1,33 @@ +#ifndef INCLUDED_BYTERUN_H +#define INCLUDED_BYTERUN_H + +typedef enum { + BO_NIL = 0x00, + BO_CONS = 0x01, + BO_CAR = 0x02, + BO_CDR = 0x03, + BO_NULL = 0x04, + BO_POP = 0x05, + BO_COPY = 0x06, + BO_LOAD = 0x07, + BO_EQ = 0x08, + BO_FUNCALL = 0x09, + BO_PRINT = 0x7f, + BO_INVALID = 0xff, +} BytecodeOpcode; + +// make sure this is kept up to date with base.h +typedef enum { + BYTECODE_CONST_FIXNUM = 0, + BYTECODE_CONST_FLOAT = 1, + BYTECODE_CONST_CONS = 2, + BYTECODE_CONST_STRING = 3, + BYTECODE_CONST_SYMBOL = 4, + BYTECODE_CONST_VECTOR = 5, + // So such constant BYTECODE_CONST_HASH_TABLE = 6, + BYTECODE_CONST_FUNCTION = 7, + BYTECODE_CONST_LIST = 8, + BYTECODE_CONST_INVALID = 0xff, +} BytecodeConstType; + +#endif diff --git a/src/gc.h b/src/gc.h index 3136090..2be5c16 100644 --- a/src/gc.h +++ b/src/gc.h @@ -67,7 +67,7 @@ void lisp_gc_register_static_object(void *val); void gc_move_to_set(void *val, ObjectGCSet new_set); // note that the argument is restrict! -void lisp_gc_now(LispGCStats *restrict status); +void lisp_gc_now(LispGCStats *restrict stats); // Debug void debug_print_gc_stats(FILE *stream, const LispGCStats *stats); diff --git a/src/main.c b/src/main.c index ef4eadf..8ece9d7 100644 --- a/src/main.c +++ b/src/main.c @@ -1,30 +1,36 @@ +#include "byterun/assemble.h" #include "lisp.h" #include "read.h" #include -DEFUN(cool_func, "cool-func", (LispVal * a, LispVal *b), "(a &optional b)", - "") { - printf("A: "); - debug_obj_info(stdout, a); - printf("B: "); - debug_obj_info(stdout, b); - return Qnil; -} +/* DEFUN(cool_func, "cool-func", (LispVal * a, LispVal *b), "(a &optional b)", + */ +/* "") { */ +/* printf("A: "); */ +/* debug_obj_info(stdout, a); */ +/* printf("B: "); */ +/* debug_obj_info(stdout, b); */ +/* return Qnil; */ +/* } */ -int main(int argc, const char **argv) { - LispGCStats gc_stats; - lisp_init(); - REGISTER_GLOBAL_FUNCTION(cool_func); - push_stack_frame(Qnil, Qnil, Qnil); - ReadStream s; - const char BUF[] = "(cool-func 1 (cons 1 2))"; - read_stream_init(&s, BUF, sizeof(BUF) - 1); - LispVal *l = read(&s); - Feval(l); - lisp_gc_now(&gc_stats); - debug_print_gc_stats(stdout, &gc_stats); - pop_stack_frame(); - lisp_shutdown(); +/* int main(int argc, const cha-r **argv) { */ +/* LispGCStats gc_stats; */ +/* lisp_init(); */ +/* REGISTER_GLOBAL_FUNCTION(cool_func); */ +/* push_stack_frame(Qnil, Qnil, Qnil); */ +/* ReadStream s; */ +/* const char BUF[] = "(cool-func 1 (cons 1 2))"; */ +/* read_stream_init(&s, BUF, sizeof(BUF) - 1); */ +/* LispVal *l = read(&s); */ +/* Feval(l); */ +/* lisp_gc_now(&gc_stats); */ +/* debug_print_gc_stats(stdout, &gc_stats); */ +/* pop_stack_frame(); */ +/* lisp_shutdown(); */ +/* return 0; */ +/* } */ + +int main(int argc, const char **argvc) { return 0; } diff --git a/src/memory.c b/src/memory.c index 8698bb3..34ecf88 100644 --- a/src/memory.c +++ b/src/memory.c @@ -36,3 +36,69 @@ void *lisp_aligned_alloc(size_t alignment, size_t size) { } return ptr; } + +#define STRING_STREAM_BLOCK_SIZE 32 +static void ensure_string_stream_space(StringStream *restrict stream, + size_t space) { + size_t min_size = stream->nchars + space; + size_t new_size = stream->size; + while (new_size < min_size) { + new_size += STRING_STREAM_BLOCK_SIZE; + } + if (new_size != stream->size) { + stream->buffer = lisp_realloc(stream->buffer, new_size + 1); + } +} + +int string_stream_printf(StringStream *restrict stream, + const char *restrict format, ...) { + va_list args; + va_start(args, format); + int rval = string_stream_vprintf(stream, format, args); + va_end(args); + return rval; +} + +int string_stream_vprintf(StringStream *restrict stream, + const char *restrict format, va_list args) { + va_list args_copy; + va_copy(args_copy, args); + int space = vsnprintf(NULL, 0, format, args_copy); + if (space < 0) { + abort(); + } + va_end(args_copy); + ensure_string_stream_space(stream, space); + int rval = vsnprintf(stream->buffer + stream->nchars, + stream->size + 1 - stream->nchars, format, args); + if (rval < 0) { + abort(); + } + stream->nchars += rval; + return rval; +} + +bool strgetline(const char *restrict buf, size_t buf_length, + const char **restrict start, size_t *restrict length) { + if (!*start) { + *start = buf; + if (!buf_length) { + *length = 0; + return true; + } + } else if (!buf_length) { + return false; + } else if (*start + *length >= buf + buf_length - 1) { + return false; + } else /* if (*start) */ { + *start += *length + 1; + } + size_t left = buf_length - (*start - buf); + char *found; + if ((found = memchr(*start, '\n', left))) { + *length = found - *start; + } else { + *length = left; + } + return true; +} diff --git a/src/memory.h b/src/memory.h index 43f504c..fb1e020 100644 --- a/src/memory.h +++ b/src/memory.h @@ -2,6 +2,7 @@ #define INCLUDED_MEMORY_H #include +#include #include #include #include @@ -18,6 +19,12 @@ # define ALWAYS_INLINE inline #endif +#if __has_attribute(format) +# define FORMAT(n, m) __attribute__((format(printf, n, m))) +#else +# define FORMAT(n, m) +#endif + // Byte order stuff typedef enum { ENDIAN_LITTLE, @@ -123,4 +130,40 @@ static ALWAYS_INLINE void add_timespecs(const struct timespec *t1, out->tv_nsec = nsec; } +typedef struct { + // this is actually size + 1 bytes for the null byte + char *buffer; + size_t size; + size_t nchars; +} StringStream; + +static inline void string_stream_init(StringStream *restrict stream) { + stream->buffer = lisp_malloc(1); + stream->size = 0; + stream->buffer[stream->size] = '\0'; + stream->nchars = 0; +} + +static inline void string_stream_free(StringStream *restrict stream) { + lisp_free(stream->buffer); +} + +static inline void string_stream_steal(StringStream *restrict stream, + char **restrict out, + size_t *restrict out_length) { + *out = stream->buffer; + *out_length = stream->nchars; +} + +int string_stream_printf(StringStream *restrict stream, + const char *restrict format, ...) FORMAT(2, 3); +int string_stream_vprintf(StringStream *restrict stream, + const char *restrict format, va_list args); + +// Get the next line in BUF starting at *START (or &buf[0] if *START is +// NULL). Store the length of the line in LENGTH. BUF is BUF_LENGTH bytes +// long. Return true if we found another line and false otherwise. +bool strgetline(const char *restrict buf, size_t buf_length, + const char **restrict start, size_t *restrict length); + #endif