Some work

This commit is contained in:
2026-01-22 21:08:02 -08:00
parent eca8ae3d3e
commit f67ed56d52
12 changed files with 488 additions and 38 deletions

View File

@ -6,7 +6,8 @@ CompileFlags:
-Wpedantic,
-xc,
-D_POSIX_C_SOURCE=199309L,
"-fsanitize=address,undefined",
"-Isrc",
"-I../",
]
Compiler: gcc
---
@ -14,8 +15,7 @@ If:
PathMatch: .*\.h
CompileFlags:
Remove: -xc
Add: [-std=c11, -Wall, -Wpedantic, -xc-header]
Compiler: gcc
Add: -xc-header
---
If:
PathMatch: bin/.*\.c

View File

@ -19,7 +19,7 @@ CFLAGS=$(DEBUG_CFLAGS) $(LLVM_SAN_FLAGS) -std=c11 -Wall -Wpedantic $\
LD=gcc
LDFLAGS=$(LLVM_SAN_FLAGS)
SRCS:=$(wildcard src/*.c)
SRCS:=$(wildcard src/*.c) $(wildcard src/byterun/*.c)
OBJS:=$(SRCS:src/%.c=bin/%.o)
DEPS:=$(SRCS:src/%.c=bin/deps/%.d)
@ -34,12 +34,11 @@ glisp: $(OBJS)
$(LD) $(LDFLAGS) -o $@ $^
bin/init_globals.c: $(filter-out bin/init_globals.c,$(SRCS_WITH_HEADERS)) src/gen-init-globals.awk
@mkdir -p bin/
awk -f src/gen-init-globals.awk $(filter-out src/gen-init-globals.awk,$^) >$@
bin/%.o: src/%.c
@mkdir -p bin/deps
$(CC) $(CFLAGS) -c -MMD -MF $(<:src/%.c=bin/deps/%.d) -o $@ $<
@mkdir -p $(dir $(<:src/%.c=bin/deps/%.d) $(<:src/%.c=bin/%))
$(CC) $(CFLAGS) -c -MMD -MF $(<:src/%.c=bin/deps/%.d) -I src/ -o $@ $<
bin/init_globals.o: bin/init_globals.c
@mkdir -p bin/deps

View File

@ -76,15 +76,16 @@ static ALWAYS_INLINE LispVal *MAKE_LISP_FLOAT(lisp_float_t flt) {
// ###############
// # Other types #
// ###############
// Make sure this is kept up to date with byterun.h
typedef enum {
TYPE_FIXNUM,
TYPE_FLOAT,
TYPE_CONS,
TYPE_STRING,
TYPE_SYMBOL,
TYPE_VECTOR,
TYPE_HASH_TABLE,
TYPE_FUNCTION,
TYPE_FIXNUM = 0,
TYPE_FLOAT = 1,
TYPE_CONS = 2,
TYPE_STRING = 3,
TYPE_SYMBOL = 4,
TYPE_VECTOR = 5,
TYPE_HASH_TABLE = 6,
TYPE_FUNCTION = 7,
N_LISP_TYPES,
} LispValType;
extern const char *LISP_TYPE_NAMES[N_LISP_TYPES];

59
src/byterun/BYTECODE.md Normal file
View File

@ -0,0 +1,59 @@
The bytecode interpreter runs using a stack. Most instructions operate on the
stack. Each instruction starts with a 1 byte opcode. If it has arguments, they
come next (and are detailed below). All functions push their return value onto
the stack (though some produce no return value).
The leftmost (most significant) bit of the opcode is the "save flag" if it is
set, the instruction does not remove its arguments from the stack.
Each block of bytecode (function, etc.) starts with some constants. They have
the following format
WORDSIZE bytes - Number of constants
NOTE: in the below MINSIZE is the minimum number of bytes needed to reach any
object, signed.
The constants start with 1 byte for type (as in base.h). Their formats follow:
FIXNUM: WORDSIZE bytes (for the value with the tag already set).
FLOAT: WORDSIZE bytes (for the value with the tag already set).
CONS: two MINSIZE bytes fields for the car and cdr.
STRING: WORDSIZE bytes for the length, then the bytes themselves.
SYMBOL: WORDSIZE bytes for the length, then the bytes themselves. These are for
the name. The VM interns the symbol when it loads the bytecode.
VECTOR: WORDSIZE bytes for the length. Then MINSIZE bytes for each element.
HASH_TABLE: Not a valid constant
FUNCTION: TODO - Not implemented yet (this will include lambdas).
LIST (Type code is FUNCTION + 1): Ditto vector, but a list is created
instead. This is just a convenience.
Instructions (Name - Opcode):
NIL - 0x0: Push nil onto the stack.
CONS - 0x1: Return a cons of the top of the stack. The top becomes the cdr and
the next element the car.
CAR - 0x2: Return the car of the cons on top of the stack.
CDR - 0x3: Return the cdr of the cons on top of the stack.
NULL - 0x4: Return t if the top if the stack is nil, otherwise return nil.
POP - 0x5: Throw away the top of the stack (produces no value). Doing this with
the save flag set is a noop.
COPY - 0x6: Takes one argument which is a zero-indexed offset from the top of
the stack (as in 0 is the top). Pushes a copy of the element onto the top of the
stack.
LOAD - 0x7: Load the constant numbered onto the top of the stack. This takes no
stack arguments. It takes one constant argument (which is the smallest width
necessary to load all constants). This argument is simply a number.
EQ - 0x8: If the top two elements of the stack are the same, object, return t,
otherwise return nil.
FUNCALL - 0x9: Call the function on top of the stack. The second argument is the
number of arguments. That many elements are then used as the arguments. The
first argument is the one directly following the count.
PRINT - 0x7f: DEBUG ONLY - print the top of the stack

210
src/byterun/assemble.c Normal file
View File

@ -0,0 +1,210 @@
#include "assemble.h"
#include "base.h"
#include "memory.h"
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <strings.h>
BytecodeOpcode bytecode_opcode_by_name(const char *name, size_t length) {
struct Bucket {
size_t name_len;
const char *name;
BytecodeOpcode opcode;
struct Bucket *next;
};
static struct Bucket *TABLE[26] = {NULL};
static bool did_init = false;
if (!did_init) {
#define PUT(mname) \
{ \
struct Bucket *n = lisp_malloc(sizeof(struct Bucket)); \
n->name_len = sizeof(#mname) - 1; \
n->name = #mname; \
n->opcode = BO_##mname; \
size_t hash = tolower(#mname[0]) - 'a'; \
n->next = TABLE[hash]; \
TABLE[hash] = n; \
}
PUT(NIL);
PUT(CONS);
PUT(CAR);
PUT(CDR);
PUT(NULL);
PUT(POP);
PUT(COPY);
PUT(LOAD);
PUT(EQ);
PUT(FUNCALL);
PUT(PRINT);
#undef PUT
did_init = true;
}
if (!length) {
return BO_INVALID;
}
size_t hash = tolower(name[0]) - 'a';
assert(0 <= hash && hash <= 25);
for (struct Bucket *cur = TABLE[hash]; cur; cur = cur->next) {
if (cur->name_len == length && strcasecmp(cur->name, name) == 0) {
return cur->opcode;
}
}
return BO_INVALID;
}
const char *bytecode_opcode_name(BytecodeOpcode opcode, size_t *length) {
#define DEF(name) \
case BO_##name: \
if (length) { \
*length = sizeof(#name) - 1; \
} \
return #name
switch (opcode) {
DEF(NIL);
DEF(CONS);
DEF(CAR);
DEF(CDR);
DEF(NULL);
DEF(POP);
DEF(COPY);
DEF(LOAD);
DEF(EQ);
DEF(FUNCALL);
DEF(PRINT);
case BO_INVALID:
return NULL;
default:
abort();
}
#undef DEF
}
BytecodeConstType bytecode_const_type_by_name(const char *name, size_t length) {
#define CHECK(type) \
if (strcasecmp(name, #type)) { \
return BYTECODE_CONST_##type; \
}
CHECK(FIXNUM);
CHECK(FLOAT);
CHECK(CONS);
CHECK(STRING);
CHECK(SYMBOL);
CHECK(VECTOR);
CHECK(FUNCTION);
CHECK(LIST);
return BYTECODE_CONST_INVALID;
#undef CHECK
}
const char *bytecode_const_name(BytecodeConstType type, size_t *length) {
#define DEF(name) \
case BYTECODE_CONST_##name: \
if (length) { \
*length = sizeof(#name) - 1; \
} \
return #name;
switch (type) {
DEF(FIXNUM);
DEF(FLOAT);
DEF(CONS);
DEF(STRING);
DEF(SYMBOL);
DEF(VECTOR);
DEF(FUNCTION);
DEF(LIST);
case BO_INVALID:
return NULL;
default:
abort();
}
}
enum AssembleMode { MODE_NONE, MODE_CONSTANTS, MODE_TEXT };
typedef struct {
BytecodeConstType type;
union {
fixnum_t fixnum;
lisp_float_t lisp_float;
struct {
intptr_t car;
intptr_t cdr;
} cons;
struct {
size_t length;
const char *bytes;
} string;
struct {
size_t length;
const char *bytes;
} symbol;
struct {
size_t length;
intptr_t elements;
} vector;
struct {
// TODO implement
int dummy;
} function;
struct {
size_t length;
intptr_t elements;
} list;
};
} AssembleConstant;
struct AssembleState {
enum AssembleMode mode;
size_t line_no;
size_t num_constants;
AssembleConstant *constants;
size_t num_instructions;
};
static bool is_blank_string(const char *data, size_t len) {
for (size_t i = 0; i < len; ++i) {
if (data[i] != ' ' && data[i] != '\t') {
return false;
}
}
return true;
}
static BytecodeAssembleStatus
process_line_none_mode(struct AssembleState *restrict state,
const size_t line_len, const char line[line_len],
BytecodeAssembleError *restrict error) {}
BytecodeAssembleStatus
bytecode_assemble(const size_t src_length, const char src[src_length],
size_t *restrict out_length, char **restrict out,
BytecodeAssembleError *restrict error) {
StringStream stream;
string_stream_init(&stream);
struct AssembleState state = {
.mode = MODE_NONE,
.line_no = 1,
.num_constants = 0,
.constants = NULL,
.num_instructions = 0,
};
const char *line = NULL;
size_t line_len;
while (strgetline(src, src_length, &line, &line_len)) {
switch (state.mode) {
case MODE_NONE:
process_line_none_mode(&state, line_len, line, error);
break;
case MODE_CONSTANTS:
case MODE_TEXT:
break;
}
++state.line_no;
}
string_stream_steal(&stream, out, out_length);
return BYTECODE_ASSEMBLE_OK;
}

32
src/byterun/assemble.h Normal file
View File

@ -0,0 +1,32 @@
#ifndef INCLUDED_ASSEMBLE_H
#define INCLUDED_ASSEMBLE_H
#include "byterun.h"
#include <stddef.h>
BytecodeOpcode bytecode_opcode_by_name(const char *name, size_t length);
const char *bytecode_opcode_name(BytecodeOpcode opcode, size_t *length);
BytecodeConstType bytecode_const_type_by_name(const char *name, size_t length);
const char *bytecode_const_name(BytecodeConstType type, size_t *length);
typedef enum {
BYTECODE_ASSEMBLE_OK,
BYTECODE_ASSEMBLE_SYNTAX,
} BytecodeAssembleStatus;
typedef struct {
size_t line;
size_t col;
size_t length;
const char *detail;
} BytecodeAssembleError;
BytecodeAssembleStatus bytecode_assemble(const size_t src_length,
const char src[src_length],
size_t *restrict out_length,
char **restrict out,
BytecodeAssembleError *restrict error);
#endif

1
src/byterun/byterun.c Normal file
View File

@ -0,0 +1 @@
#include "byterun.h"

33
src/byterun/byterun.h Normal file
View File

@ -0,0 +1,33 @@
#ifndef INCLUDED_BYTERUN_H
#define INCLUDED_BYTERUN_H
typedef enum {
BO_NIL = 0x00,
BO_CONS = 0x01,
BO_CAR = 0x02,
BO_CDR = 0x03,
BO_NULL = 0x04,
BO_POP = 0x05,
BO_COPY = 0x06,
BO_LOAD = 0x07,
BO_EQ = 0x08,
BO_FUNCALL = 0x09,
BO_PRINT = 0x7f,
BO_INVALID = 0xff,
} BytecodeOpcode;
// make sure this is kept up to date with base.h
typedef enum {
BYTECODE_CONST_FIXNUM = 0,
BYTECODE_CONST_FLOAT = 1,
BYTECODE_CONST_CONS = 2,
BYTECODE_CONST_STRING = 3,
BYTECODE_CONST_SYMBOL = 4,
BYTECODE_CONST_VECTOR = 5,
// So such constant BYTECODE_CONST_HASH_TABLE = 6,
BYTECODE_CONST_FUNCTION = 7,
BYTECODE_CONST_LIST = 8,
BYTECODE_CONST_INVALID = 0xff,
} BytecodeConstType;
#endif

View File

@ -67,7 +67,7 @@ void lisp_gc_register_static_object(void *val);
void gc_move_to_set(void *val, ObjectGCSet new_set);
// note that the argument is restrict!
void lisp_gc_now(LispGCStats *restrict status);
void lisp_gc_now(LispGCStats *restrict stats);
// Debug
void debug_print_gc_stats(FILE *stream, const LispGCStats *stats);

View File

@ -1,30 +1,36 @@
#include "byterun/assemble.h"
#include "lisp.h"
#include "read.h"
#include <stdio.h>
DEFUN(cool_func, "cool-func", (LispVal * a, LispVal *b), "(a &optional b)",
"") {
printf("A: ");
debug_obj_info(stdout, a);
printf("B: ");
debug_obj_info(stdout, b);
return Qnil;
}
/* DEFUN(cool_func, "cool-func", (LispVal * a, LispVal *b), "(a &optional b)",
*/
/* "") { */
/* printf("A: "); */
/* debug_obj_info(stdout, a); */
/* printf("B: "); */
/* debug_obj_info(stdout, b); */
/* return Qnil; */
/* } */
int main(int argc, const char **argv) {
LispGCStats gc_stats;
lisp_init();
REGISTER_GLOBAL_FUNCTION(cool_func);
push_stack_frame(Qnil, Qnil, Qnil);
ReadStream s;
const char BUF[] = "(cool-func 1 (cons 1 2))";
read_stream_init(&s, BUF, sizeof(BUF) - 1);
LispVal *l = read(&s);
Feval(l);
lisp_gc_now(&gc_stats);
debug_print_gc_stats(stdout, &gc_stats);
pop_stack_frame();
lisp_shutdown();
/* int main(int argc, const cha-r **argv) { */
/* LispGCStats gc_stats; */
/* lisp_init(); */
/* REGISTER_GLOBAL_FUNCTION(cool_func); */
/* push_stack_frame(Qnil, Qnil, Qnil); */
/* ReadStream s; */
/* const char BUF[] = "(cool-func 1 (cons 1 2))"; */
/* read_stream_init(&s, BUF, sizeof(BUF) - 1); */
/* LispVal *l = read(&s); */
/* Feval(l); */
/* lisp_gc_now(&gc_stats); */
/* debug_print_gc_stats(stdout, &gc_stats); */
/* pop_stack_frame(); */
/* lisp_shutdown(); */
/* return 0; */
/* } */
int main(int argc, const char **argvc) {
return 0;
}

View File

@ -36,3 +36,69 @@ void *lisp_aligned_alloc(size_t alignment, size_t size) {
}
return ptr;
}
#define STRING_STREAM_BLOCK_SIZE 32
static void ensure_string_stream_space(StringStream *restrict stream,
size_t space) {
size_t min_size = stream->nchars + space;
size_t new_size = stream->size;
while (new_size < min_size) {
new_size += STRING_STREAM_BLOCK_SIZE;
}
if (new_size != stream->size) {
stream->buffer = lisp_realloc(stream->buffer, new_size + 1);
}
}
int string_stream_printf(StringStream *restrict stream,
const char *restrict format, ...) {
va_list args;
va_start(args, format);
int rval = string_stream_vprintf(stream, format, args);
va_end(args);
return rval;
}
int string_stream_vprintf(StringStream *restrict stream,
const char *restrict format, va_list args) {
va_list args_copy;
va_copy(args_copy, args);
int space = vsnprintf(NULL, 0, format, args_copy);
if (space < 0) {
abort();
}
va_end(args_copy);
ensure_string_stream_space(stream, space);
int rval = vsnprintf(stream->buffer + stream->nchars,
stream->size + 1 - stream->nchars, format, args);
if (rval < 0) {
abort();
}
stream->nchars += rval;
return rval;
}
bool strgetline(const char *restrict buf, size_t buf_length,
const char **restrict start, size_t *restrict length) {
if (!*start) {
*start = buf;
if (!buf_length) {
*length = 0;
return true;
}
} else if (!buf_length) {
return false;
} else if (*start + *length >= buf + buf_length - 1) {
return false;
} else /* if (*start) */ {
*start += *length + 1;
}
size_t left = buf_length - (*start - buf);
char *found;
if ((found = memchr(*start, '\n', left))) {
*length = found - *start;
} else {
*length = left;
}
return true;
}

View File

@ -2,6 +2,7 @@
#define INCLUDED_MEMORY_H
#include <float.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@ -18,6 +19,12 @@
# define ALWAYS_INLINE inline
#endif
#if __has_attribute(format)
# define FORMAT(n, m) __attribute__((format(printf, n, m)))
#else
# define FORMAT(n, m)
#endif
// Byte order stuff
typedef enum {
ENDIAN_LITTLE,
@ -123,4 +130,40 @@ static ALWAYS_INLINE void add_timespecs(const struct timespec *t1,
out->tv_nsec = nsec;
}
typedef struct {
// this is actually size + 1 bytes for the null byte
char *buffer;
size_t size;
size_t nchars;
} StringStream;
static inline void string_stream_init(StringStream *restrict stream) {
stream->buffer = lisp_malloc(1);
stream->size = 0;
stream->buffer[stream->size] = '\0';
stream->nchars = 0;
}
static inline void string_stream_free(StringStream *restrict stream) {
lisp_free(stream->buffer);
}
static inline void string_stream_steal(StringStream *restrict stream,
char **restrict out,
size_t *restrict out_length) {
*out = stream->buffer;
*out_length = stream->nchars;
}
int string_stream_printf(StringStream *restrict stream,
const char *restrict format, ...) FORMAT(2, 3);
int string_stream_vprintf(StringStream *restrict stream,
const char *restrict format, va_list args);
// Get the next line in BUF starting at *START (or &buf[0] if *START is
// NULL). Store the length of the line in LENGTH. BUF is BUF_LENGTH bytes
// long. Return true if we found another line and false otherwise.
bool strgetline(const char *restrict buf, size_t buf_length,
const char **restrict start, size_t *restrict length);
#endif