From 94d5749d31b99d58af9f8291d3fc755e06e1bf7e Mon Sep 17 00:00:00 2001 From: Alexander Rosenberg Date: Fri, 16 Jan 2026 03:20:38 -0800 Subject: [PATCH] Reader --- Makefile | 4 +- src/argcountmacro.h | 19 ++ src/base.c | 74 +++++++ src/base.h | 181 +++++++++++++++++ src/function.c | 1 + src/function.h | 33 ++++ src/gc.c | 5 + src/gc.h | 23 +++ src/hashtable.c | 129 ++++++++++++ src/hashtable.h | 26 +++ src/lisp.c | 77 ++++++++ src/lisp.h | 18 ++ src/list.c | 27 +++ src/list.h | 105 ++++++++++ src/main.c | 15 +- src/memory.c | 38 ++++ src/memory.h | 99 ++++++++++ src/read.c | 464 ++++++++++++++++++++++++++++++++++++++++++++ src/read.h | 23 +++ 19 files changed, 1358 insertions(+), 3 deletions(-) create mode 100644 src/argcountmacro.h create mode 100644 src/base.c create mode 100644 src/base.h create mode 100644 src/function.c create mode 100644 src/function.h create mode 100644 src/gc.c create mode 100644 src/gc.h create mode 100644 src/hashtable.c create mode 100644 src/hashtable.h create mode 100644 src/lisp.c create mode 100644 src/lisp.h create mode 100644 src/list.c create mode 100644 src/list.h create mode 100644 src/memory.c create mode 100644 src/memory.h create mode 100644 src/read.c create mode 100644 src/read.h diff --git a/Makefile b/Makefile index 9520df3..258f802 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS=-g -std=c11 +CFLAGS=-g -std=c11 -Wall -Wpedantic LD=gcc LDFLAGS=-g @@ -12,7 +12,7 @@ glisp: $(OBJS) bin/%.o: src/%.c @mkdir -p bin/deps - $(CC) $(CFLAGS) -c -MMD -MF $(^:src/%.c=bin/deps/%.d) -o $@ $^ + $(CC) $(CFLAGS) -c -MMD -MF $(<:src/%.c=bin/deps/%.d) -o $@ $< clean: rm -rf glisp bin/ diff --git a/src/argcountmacro.h b/src/argcountmacro.h new file mode 100644 index 0000000..6dd5b07 --- /dev/null +++ b/src/argcountmacro.h @@ -0,0 +1,19 @@ +#ifndef INCLUDED_ARGCOUNTMACROS_H +#define INCLUDED_ARGCOUNTMACROS_H + +#define internal_TENTH(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, ...) a10 +// return the arg count, noting that ISO C requires at least 1 argument be +// passed to __VA_ARGS__ +#define COUNT_ARGS(...) \ + internal_TENTH(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +// return the arg count - 1, allowing for a dummy value to be passed +#define COUNT_ARGS_SAFE(...) \ + internal_TENTH(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0) + +#define MACRO_EVAL(a) a +#define MACRO_GLUE(a, b) a##b +#define MACRO_GLUE2(a, b) MACRO_GLUE(a, b) +#define MACRO_CALLN(name, ...) \ + MACRO_GLUE2(name, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +#endif diff --git a/src/base.c b/src/base.c new file mode 100644 index 0000000..1d6ac07 --- /dev/null +++ b/src/base.c @@ -0,0 +1,74 @@ +#include "base.h" + +#include + +const char *LISP_TYPE_NAMES[N_LISP_TYPES] = { + [TYPE_FIXNUM] = "fixnum", + [TYPE_FLOAT] = "float", + [TYPE_CONS] = "cons", + [TYPE_STRING] = "string", + [TYPE_SYMBOL] = "symbol", + [TYPE_VECTOR] = "vector", + [TYPE_HASH_TABLE] = "hash-table", + [TYPE_FUNCTION] = "function", +}; + +void *lisp_alloc_object(size_t size, LispValType type) { + assert(size >= sizeof(LispObject)); + LispObject *obj = lisp_aligned_alloc(LISP_OBJECT_ALIGNMENT, size); + obj->gc.immortal = false; + obj->gc.mark = false; + // TODO set the below + obj->gc.entry = NULL; + obj->type = type; + return obj; +} + +DEFINE_SYMBOL(nil, "nil"); +DEFINE_SYMBOL(t, "t"); +DEFINE_SYMBOL(unbound, "unbound"); + +// ################ +// # Constructors # +// ################ +LispVal *make_lisp_string(const char *data, size_t length, bool take, + bool copy) { + LispString *obj = lisp_alloc_object(sizeof(LispString), TYPE_STRING); + obj->owned = take; + obj->length = length; + if (copy) { + obj->data = lisp_malloc(length + 1); + memcpy(obj->data, data, length); + obj->data[length] = '\0'; + } else { + obj->data = (char *) data; + } + return obj; +} + +LispVal *make_vector(LispVal **data, size_t length, bool take) { + LispVector *obj = lisp_alloc_object(sizeof(LispVector), TYPE_VECTOR); + obj->length = length; + if (take) { + obj->data = data; + } else { + obj->data = lisp_malloc(sizeof(LispVal *) * length); + memcpy(obj->data, data, sizeof(LispVal *) * length); + } + return obj; +} + +DEFUN(make_symbol, "make-symbol", (LispVal * name), "(name)", + "Return an uninterned symbol called NAME.") { + LispSymbol *obj = lisp_alloc_object(sizeof(LispSymbol), TYPE_SYMBOL); + obj->name = name; + obj->function = Qnil; + obj->value = Qnil; + obj->plist = Qnil; + return obj; +} + +DEFUN(intern, "intern", (LispVal * name), "(name)", "") { + // TODO implement + return Fmake_symbol(name); +} diff --git a/src/base.h b/src/base.h new file mode 100644 index 0000000..9688824 --- /dev/null +++ b/src/base.h @@ -0,0 +1,181 @@ +#ifndef INCLUDED_TYPES_H +#define INCLUDED_TYPES_H + +#include "gc.h" +#include "memory.h" + +#include + +// ################### +// # Base value type # +// ################### +typedef void LispVal; + +// ########################## +// # Fixnum and float stuff # +// ########################## +typedef intptr_t fixnum_t; +#if LISP_WORD_BITS == 32 +# define LISP_FLOAT_SCANF "f" +typedef lisp_float32_t lisp_float_t; +#else +# define LISP_FLOAT_SCANF "lf" +typedef lisp_float64_t lisp_float_t; +#endif + +#define MOST_POSITIVE_FIXNUM ((intptr_t) ((INTPTR_MAX & ~(intptr_t) 3) >> 2)) +#define MOST_NEGATIVE_FIXNUM ((intptr_t) ((INTPTR_MIN & ~(intptr_t) 3) >> 2)) + +static ALWAYS_INLINE uintptr_t EXTRACT_TAG(LispVal *val) { + uintptr_t iv = (uintptr_t) val; + return iv & (uintptr_t) 3; +} + +#define LISP_TAG_BITS 2 +#define LISP_OBJECT_TAG ((uintptr_t) 0) +// 0b01 +#define FIXNUM_TAG ((uintptr_t) 1) +// 0b11 +#define LISP_FLOAT_TAG ((uintptr_t) 3) + +static ALWAYS_INLINE bool LISP_OBJECT_P(LispVal *val) { + return EXTRACT_TAG(val) == LISP_OBJECT_TAG; +} + +static ALWAYS_INLINE bool FIXNUMP(LispVal *val) { + return EXTRACT_TAG(val) == FIXNUM_TAG; +} + +static ALWAYS_INLINE fixnum_t XFIXNUM(LispVal *val) { + assert(FIXNUMP(val)); + return ((fixnum_t) val) >> 2; +} + +static ALWAYS_INLINE LispVal *MAKE_FIXNUM(fixnum_t fn) { + return (LispVal *) ((fn << 2) | FIXNUM_TAG); +} + +static ALWAYS_INLINE bool LISP_FLOAT_P(LispVal *val) { + return EXTRACT_TAG(val) == LISP_FLOAT_TAG; +} + +static ALWAYS_INLINE lisp_float_t XLISP_FLOAT(LispVal *val) { + assert(LISP_FLOAT_P(val)); + uintptr_t iv = (uintptr_t) val; + iv &= ~(uintptr_t) 3; + return INT_TO_FLOAT_BITS(iv); +} + +static ALWAYS_INLINE LispVal *MAKE_LISP_FLOAT(lisp_float_t flt) { + uintptr_t bits = FLOAT_TO_INT_BITS(flt); + return (LispVal *) ((bits & ~(uintptr_t) 3) | LISP_FLOAT_TAG); +} + +// ############### +// # Other types # +// ############### +typedef enum { + TYPE_FIXNUM, + TYPE_FLOAT, + TYPE_CONS, + TYPE_STRING, + TYPE_SYMBOL, + TYPE_VECTOR, + TYPE_HASH_TABLE, + TYPE_FUNCTION, + N_LISP_TYPES, +} LispValType; +extern const char *LISP_TYPE_NAMES[N_LISP_TYPES]; + +#define LISP_OBJECT_ALIGNMENT (1 << LISP_TAG_BITS) +void *lisp_alloc_object(size_t size, LispValType type); + +typedef struct { + LispValType type; + ObjectGCInfo gc; +} LispObject; + +static ALWAYS_INLINE LispValType TYPE_OF(LispVal *val) { + if (FIXNUMP(val)) { + return TYPE_FIXNUM; + } else if (LISP_FLOAT_P(val)) { + return TYPE_FLOAT; + } else { + return ((LispObject *) val)->type; + } +} + +static ALWAYS_INLINE bool LISP_TYPEP(LispVal *val, LispValType type) { + if (FIXNUMP(val)) { + return type == TYPE_FIXNUM; + } else if (LISP_FLOAT_P(val)) { + return type == TYPE_FLOAT; + } else { + return ((LispObject *) val)->type == type; + } +} + +#define DEFOBJTYPE(Name, NAME, NAME_P, body) \ + typedef struct { \ + LispObject header; \ + struct body; \ + } Lisp##Name; \ + static ALWAYS_INLINE bool NAME_P(LispVal *val) { \ + return LISP_TYPEP(val, TYPE_##NAME); \ + } \ + struct __ignored + +DEFOBJTYPE(String, STRING, STRINGP, { + size_t length; + char *data; + bool owned; +}); + +DEFOBJTYPE(Symbol, SYMBOL, SYMBOLP, { + LispVal *name; // string + LispVal *function; + LispVal *value; + LispVal *plist; +}); + +DEFOBJTYPE(Vector, VECTOR, VECTORP, { + size_t length; + LispVal **data; +}); + +#define DECLARE_SYMBOL(cname) \ + extern const char *Q##cname##_name; \ + extern LispVal *Q##cname + +#define DECLARE_FUNCTION(cname, cargs) \ + DECLARE_SYMBOL(cname); \ + extern const char F##cname_name; \ + LispVal *F##cname cargs + +#define DEFINE_SYMBOL(cname, lisp_name) \ + const char *Q##cname##_name = lisp_name; \ + LispVal *Q##cname + +#define DEFUN(cname, lisp_name, cargs, lisp_args, doc) \ + DEFINE_SYMBOL(cname, lisp_name); \ + LispVal *Q##cname; \ + LispVal *F##cname cargs + +DECLARE_SYMBOL(nil); +DECLARE_SYMBOL(t); +DECLARE_SYMBOL(unbound); + +static ALWAYS_INLINE bool NILP(LispVal *val) { + return val == Qnil; +} + +// TODO probably move these to another file +LispVal *make_lisp_string(const char *data, size_t length, bool take, + bool copy); +LispVal *make_vector(LispVal **data, size_t length, bool take); +#define LISP_LITSTR(litstr) \ + (make_lisp_string(litstr, sizeof(litstr) - 1, false, false)) +DECLARE_FUNCTION(make_symbol, (LispVal * name)); +DECLARE_FUNCTION(intern, (LispVal * name)); + +#endif diff --git a/src/function.c b/src/function.c new file mode 100644 index 0000000..d4b693e --- /dev/null +++ b/src/function.c @@ -0,0 +1 @@ +#include "function.h" diff --git a/src/function.h b/src/function.h new file mode 100644 index 0000000..6d80c3e --- /dev/null +++ b/src/function.h @@ -0,0 +1,33 @@ +#ifndef INCLUDED_FUNCTION_H +#define INCLUDED_FUNCTION_H + +#include "base.h" + +struct LambdaList { + size_t n_req; + size_t n_opt; + size_t n_kw; + LispVal *req; // list of symbols + LispVal *opt; // list of lists of (name default has-p-name) + LispVal *kw; // ditto opt + LispVal *rest; // symbom (non-nil if we have a rest arg) +}; + +union native_function { + LispVal *(*zero)(void); + LispVal *(*one)(LispVal *); + LispVal *(*two)(LispVal *, LispVal *); + LispVal *(*three)(LispVal *, LispVal *, LispVal *); + LispVal *(*four)(LispVal *, LispVal *, LispVal *, LispVal *); + LispVal *(*five)(LispVal *, LispVal *, LispVal *, LispVal *, LispVal *); +}; + +DEFOBJTYPE(Function, FUNCTION, FUNCTIONP, { + bool is_native; + struct LambdaList args; + union { + union native_function native; + } impl; +}); + +#endif diff --git a/src/gc.c b/src/gc.c new file mode 100644 index 0000000..c9396a6 --- /dev/null +++ b/src/gc.c @@ -0,0 +1,5 @@ +#include "gc.h" + +#include "lisp.h" + +void lisp_gc_register_object(void *obj) {} diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 0000000..043b493 --- /dev/null +++ b/src/gc.h @@ -0,0 +1,23 @@ +#ifndef INCLUDED_GC_H +#define INCLUDED_GC_H + +#include + +typedef struct GCEntry { + void *obj; + struct GCEntry *prev; + struct GCEntry *next; +} GCEntry; + +typedef struct { + unsigned int immortal : 1; + unsigned int mark : 1; + GCEntry *entry; +} ObjectGCInfo; + +// the argument is a LispVal * +void lisp_gc_register_object(void *obj); + +size_t lisp_gc_now(void); + +#endif diff --git a/src/hashtable.c b/src/hashtable.c new file mode 100644 index 0000000..8d6462f --- /dev/null +++ b/src/hashtable.c @@ -0,0 +1,129 @@ +#include "hashtable.h" + +#define INITIAL_SIZE 32 +#define GROWTH_THRESHOLD 0.5 +#define GROWTH_FACTOR 2 + +static ALWAYS_INLINE bool BUCKET_EMPTY_P(struct HashTableBucket *b) { + return !b->key; +} + +static ALWAYS_INLINE float TABLE_LOAD(LispHashTable *ht) { + return ((float) ht->count) / ht->size; +} + +DEFUN(make_hash_table, "make-hash-table", (LispVal * hash_fn, LispVal *eq_fn), + "(hash-fn eq-fn)", "") { + LispHashTable *obj = + lisp_alloc_object(sizeof(LispHashTable), TYPE_HASH_TABLE); + obj->eq_fn = eq_fn; + obj->hash_fn = hash_fn; + obj->count = 0; + obj->size = INITIAL_SIZE; + obj->data = lisp_malloc0(sizeof(struct HashTableBucket) * obj->size); + return obj; +} + +static uintptr_t hash_key_for_table(LispHashTable *ht, LispVal *key) { + if (NILP(ht->hash_fn)) { + return (uintptr_t) key; + } + // TODO change + abort(); +} + +static bool compare_keys(LispHashTable *ht, LispVal *key1, LispVal *key2) { + if (NILP(ht->eq_fn)) { + return key1 == key2; + } + // TODO change + abort(); +} + +static struct HashTableBucket * +find_bucket_for_key(LispHashTable *ht, LispVal *key, uintptr_t hash) { + assert(TABLE_LOAD(ht) < 0.95f); + for (uintptr_t i = hash % ht->size; true; i = (i + 1) % ht->size) { + struct HashTableBucket *cb = &ht->data[i]; + if (BUCKET_EMPTY_P(cb) + || (cb->hash == hash && compare_keys(ht, key, cb->key))) { + return cb; + } + } +} + +static void rehash(LispHashTable *ht, size_t new_size) { + struct HashTableBucket *old_data = ht->data; + size_t old_size = ht->size; + ht->size = new_size; + ht->data = lisp_malloc0(sizeof(struct HashTableBucket) * new_size); + for (size_t i = 0; i < old_size; ++i) { + struct HashTableBucket *cob = &old_data[i]; + if (!BUCKET_EMPTY_P(cob)) { + struct HashTableBucket *nb = + find_bucket_for_key(ht, cob->key, cob->hash); + nb->hash = cob->hash; + nb->key = cob->key; + nb->value = cob->value; + } + } +} + +static void maybe_rehash(LispHashTable *ht) { + if (TABLE_LOAD(ht) > GROWTH_THRESHOLD) { + rehash(ht, ht->size * GROWTH_FACTOR); + } +} + +// TODO type checking +DEFUN(gethash, "gethash", (LispVal * ht, LispVal *key, LispVal *def), + "(ht key &optional def)", "") { + uintptr_t hash = hash_key_for_table(ht, key); + struct HashTableBucket *b = find_bucket_for_key(ht, key, hash); + return BUCKET_EMPTY_P(b) ? def : b->value; +} + +DEFUN(puthash, "puthash", (LispVal * ht, LispVal *key, LispVal *val), + "(ht key val)", "") { + maybe_rehash(ht); + uintptr_t hash = hash_key_for_table(ht, key); + struct HashTableBucket *b = find_bucket_for_key(ht, key, hash); + if (BUCKET_EMPTY_P(b)) { + b->hash = hash; + b->key = key; + } + b->value = val; + ++((LispHashTable *) ht)->count; + return Qnil; +} + +DEFUN(remhash, "remhash", (LispVal * ht, LispVal *key), "(ht key)", "") { + uintptr_t hash = hash_key_for_table(ht, key); + struct HashTableBucket *b = find_bucket_for_key(ht, key, hash); + if (BUCKET_EMPTY_P(b)) { + return Qnil; + } + b->key = NULL; + b->value = NULL; // just because + LispHashTable *tobj = ht; + --tobj->count; + size_t k = hash % tobj->size; + for (size_t i = (k + 1) % tobj->size; !BUCKET_EMPTY_P(&tobj->data[i]); + i = (i + 1) % tobj->size) { + size_t target = tobj->data[i].hash % tobj->size; + if ((i > k && target >= k && target < i) + || (i < k && (target >= k || target < i))) { + tobj->data[k].hash = tobj->data[i].hash; + tobj->data[k].key = tobj->data[i].key; + tobj->data[k].value = tobj->data[i].value; + tobj->data[i].key = NULL; + tobj->data[i].value = NULL; + k = i; + } + } + return Qt; +} + +DEFUN(hash_table_count, "hash-table-count", (LispVal * ht), "(ht)", "") { + return MAKE_FIXNUM(((LispHashTable *) ht)->count); +} diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..55e1ad1 --- /dev/null +++ b/src/hashtable.h @@ -0,0 +1,26 @@ +#ifndef INCLUDED_HASHTABLE_H +#define INCLUDED_HASHTABLE_H + +#include "base.h" + +struct HashTableBucket { + uintptr_t hash; + LispVal *key; + LispVal *value; +}; + +DEFOBJTYPE(HashTable, HASH_TABLE, HASH_TABLE_P, { + LispVal *hash_fn; + LispVal *eq_fn; + struct HashTableBucket *data; + size_t size; + size_t count; +}); + +DECLARE_FUNCTION(make_hash_table, (LispVal * hash_fn, LispVal *eq_fn)); +DECLARE_FUNCTION(gethash, (LispVal * ht, LispVal *key, LispVal *def)); +DECLARE_FUNCTION(puthash, (LispVal * ht, LispVal *key, LispVal *val)); +DECLARE_FUNCTION(remhash, (LispVal * ht, LispVal *key)); +DECLARE_FUNCTION(hash_table_count, (LispVal * ht)); + +#endif diff --git a/src/lisp.c b/src/lisp.c new file mode 100644 index 0000000..cf77557 --- /dev/null +++ b/src/lisp.c @@ -0,0 +1,77 @@ +#include "lisp.h" + +void lisp_init() { + Qnil = Fmake_symbol(LISP_LITSTR("nil")); + Qt = Fmake_symbol(LISP_LITSTR("t")); + Qunbound = Fmake_symbol(LISP_LITSTR("unbound")); +} + +void lisp_shutdown() {} + +void debug_print(FILE *file, LispVal *obj) { + switch (TYPE_OF(obj)) { + case TYPE_FIXNUM: + fprintf(file, "%jd", (intmax_t) XFIXNUM(obj)); + break; + case TYPE_FLOAT: + fprintf(file, "%f", (double) XLISP_FLOAT(obj)); + break; + case TYPE_STRING: { + LispString *s = obj; + fputc('"', file); + fwrite(s->data, 1, s->length, file); + fputc('"', file); + break; + } + case TYPE_SYMBOL: { + LispString *name = ((LispSymbol *) obj)->name; + fwrite(name->data, 1, name->length, file); + break; + } + case TYPE_HASH_TABLE: { + fprintf(file, "", + ((LispHashTable *) obj)->count, (uintmax_t) obj); + break; + } + case TYPE_FUNCTION: { + fprintf(file, "", (uintmax_t) obj); + break; + } + case TYPE_CONS: { + fputc('(', file); + FOREACH_TAIL(obj, tail) { + if (CONSP(tail)) { + debug_print(file, XCAR(tail)); + if (!NILP(XCDR(tail))) { + fputc(' ', file); + } + } else { + fwrite(". ", 1, 2, file); + debug_print(file, tail); + } + } + fputc(')', file); + break; + } + case TYPE_VECTOR: { + LispVector *v = obj; + fputc('[', file); + for (size_t i = 0; i < v->length; ++i) { + debug_print(file, v->data[i]); + if (i < v->length - 1) { + fputc(' ', file); + } + } + fputc(']', file); + break; + } + default: + abort(); + } +} + +void debug_obj_info(FILE *file, LispVal *obj) { + fprintf(file, "%s -> ", LISP_TYPE_NAMES[TYPE_OF(obj)]); + debug_print(file, obj); + fputc('\n', file); +} diff --git a/src/lisp.h b/src/lisp.h new file mode 100644 index 0000000..521073e --- /dev/null +++ b/src/lisp.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_LISP_H +#define INCLUDED_LISP_H + +#include "base.h" +#include "function.h" +#include "hashtable.h" +#include "list.h" + +#include + +void lisp_init(void); + +void lisp_shutdown(void); + +void debug_print(FILE *file, LispVal *obj); +void debug_obj_info(FILE *file, LispVal *obj); + +#endif diff --git a/src/list.c b/src/list.c new file mode 100644 index 0000000..93d0949 --- /dev/null +++ b/src/list.c @@ -0,0 +1,27 @@ +#include "list.h" + +intptr_t list_length(LispVal *list) { + assert(LISTP(list)); + LispVal *tortise = list; + LispVal *hare = list; + intptr_t length = 0; + while (CONSP(tortise)) { + tortise = XCDR_SAFE(tortise); + hare = XCDR_SAFE(XCDR_SAFE(hare)); + if (!NILP(hare) && tortise == hare) { + return -1; + } + ++length; + } + return length; +} + +DEFUN(cons, "cons", (LispVal * car, LispVal *cdr), "(car cdr)", + "Construct a new cons object from CAR and CDR.") { + return CONS(car, cdr); +} + +DEFUN(length, "length", (LispVal * list), "(list)", "") { + // TODO type check + return MAKE_FIXNUM(list_length(list)); +} diff --git a/src/list.h b/src/list.h new file mode 100644 index 0000000..823be13 --- /dev/null +++ b/src/list.h @@ -0,0 +1,105 @@ +#ifndef INCLUDED_LIST_H +#define INCLUDED_LIST_H + +#include "argcountmacro.h" +#include "base.h" + +#include + +DEFOBJTYPE(Cons, CONS, CONSP, { + LispVal *car; + LispVal *cdr; +}); + +static ALWAYS_INLINE bool LISTP(LispVal *obj) { + return NILP(obj) || CONSP(obj); +} + +static ALWAYS_INLINE LispVal *CONS(LispVal *car, LispVal *cdr) { + LispCons *obj = (LispCons *) lisp_alloc_object(sizeof(LispCons), TYPE_CONS); + obj->car = car; + obj->cdr = cdr; + return obj; +} + +static ALWAYS_INLINE LispVal *XCAR(LispVal *cons) { + assert(CONSP(cons) || NILP(cons)); + return NILP(cons) ? Qnil : ((LispCons *) cons)->car; +} + +static ALWAYS_INLINE LispVal *XCDR(LispVal *cons) { + assert(CONSP(cons) || NILP(cons)); + return NILP(cons) ? Qnil : ((LispCons *) cons)->cdr; +} + +static ALWAYS_INLINE LispVal *XCAR_SAFE(LispVal *cons) { + return CONSP(cons) ? XCAR(cons) : Qnil; +} + +static ALWAYS_INLINE LispVal *XCDR_SAFE(LispVal *cons) { + return CONSP(cons) ? XCDR(cons) : Qnil; +} + +static ALWAYS_INLINE void RPLACA(LispVal *cons, LispVal *newcar) { + assert(CONSP(cons)); + ((LispCons *) cons)->car = newcar; +} + +static ALWAYS_INLINE void RPLACD(LispVal *cons, LispVal *newcdr) { + assert(CONSP(cons)); + ((LispCons *) cons)->cdr = newcdr; +} + +static ALWAYS_INLINE LispVal *LIST1(LispVal *v1) { + return CONS(v1, Qnil); +} +static ALWAYS_INLINE LispVal *LIST2(LispVal *v1, LispVal *v2) { + return CONS(v1, LIST1(v2)); +} +static ALWAYS_INLINE LispVal *LIST3(LispVal *v1, LispVal *v2, LispVal *v3) { + return CONS(v1, LIST2(v2, v3)); +} +static ALWAYS_INLINE LispVal *LIST4(LispVal *v1, LispVal *v2, LispVal *v3, + LispVal *v4) { + return CONS(v1, LIST3(v2, v3, v4)); +} +static ALWAYS_INLINE LispVal *LIST5(LispVal *v1, LispVal *v2, LispVal *v3, + LispVal *v4, LispVal *v5) { + return CONS(v1, LIST4(v2, v3, v4, v5)); +} +static ALWAYS_INLINE LispVal *LIST6(LispVal *v1, LispVal *v2, LispVal *v3, + LispVal *v4, LispVal *v5, LispVal *v6) { + return CONS(v1, LIST5(v2, v3, v4, v5, v6)); +} +static ALWAYS_INLINE LispVal *LIST_N(int count, ...) { + va_list list; + va_start(list, count); + LispVal *acc = Qnil; + LispVal *end; + while (count--) { + LispVal *next = CONS(va_arg(list, LispVal *), Qnil); + if (NILP(acc)) { + acc = next; + end = acc; + } else { + RPLACD(end, next); + end = next; + } + } + va_end(list); + return acc; +} +#define LIST(...) MACRO_CALLN(LIST, __VA_ARGS__) + +#define FOREACH(l, v) \ + for (LispVal *_tail = (l), *v = XCAR(_tail); !NILP(_tail); \ + _tail = XCDR(_tail), v = XCAR(_tail)) + +#define FOREACH_TAIL(l, v) for (LispVal *v = (l); !NILP(v); v = XCDR_SAFE(v)) + +intptr_t list_length(LispVal *list); + +DECLARE_FUNCTION(cons, (LispVal * car, LispVal *cdr)); +DECLARE_FUNCTION(length, (LispVal * list)); + +#endif diff --git a/src/main.c b/src/main.c index a0bab7c..929c2ce 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,19 @@ +#include "lisp.h" +#include "read.h" + #include int main(int argc, const char **argv) { - printf("Hello World\n"); + lisp_init(); + ReadStream s; + const char BUF[] = "`(1 . ,2)"; + read_stream_init(&s, BUF, sizeof(BUF) - 1); + LispVal *l = read(&s); + if (!l) { + printf("EOF\n"); + } else { + debug_obj_info(stdout, l); + } + lisp_shutdown(); return 0; } diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..8698bb3 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,38 @@ +#include "memory.h" + +#include +#include +#include + +void *lisp_realloc(void *oldptr, size_t size) { + if (!size) { + assert(oldptr != NULL); + return NULL; + } else { + void *newptr = realloc(oldptr, size); + if (!newptr) { + fputs("Allocation failed!", stderr); + abort(); + } + return newptr; + } +} + +void *lisp_malloc(size_t size) { + return lisp_realloc(NULL, size); +} + +void *lisp_malloc0(size_t size) { + void *ptr = lisp_malloc(size); + memset(ptr, 0, size); + return ptr; +} + +void *lisp_aligned_alloc(size_t alignment, size_t size) { + void *ptr = aligned_alloc(alignment, size); + if (!ptr) { + fputs("Allocation failed!", stderr); + abort(); + } + return ptr; +} diff --git a/src/memory.h b/src/memory.h new file mode 100644 index 0000000..4eb1938 --- /dev/null +++ b/src/memory.h @@ -0,0 +1,99 @@ +#ifndef INCLUDED_MEMORY_H +#define INCLUDED_MEMORY_H + +#include +#include +#include +#include + +// Geneal macros +#ifndef __has_attribute +# define __has_attribute(attr) 0 +#endif + +#if __has_attribute(always_inline) +# define ALWAYS_INLINE inline __attribute__((always_inline)) +#else +# define ALWAYS_INLINE inline +#endif + +// Byte order stuff +typedef enum { + ENDIAN_LITTLE, + ENDIAN_BIG, +} Endianness; + +static ALWAYS_INLINE Endianness ENDIANNESS(void) { + uint16_t x = 1; + return *(char *) &x == 1 ? ENDIAN_LITTLE : ENDIAN_BIG; +} + +static ALWAYS_INLINE bool LITTLE_ENDIAN_P(void) { + return ENDIANNESS() == ENDIAN_LITTLE; +} + +// General float stuff +#if SIZE_MAX == 0xffffffff +# define LISP_WORD_BITS 32 +#elif SIZE_MAX == 0xffffffffffffffff +# define LISP_WORD_BITS 64 +#else +# error "Unablem to determine system word size!" +#endif +// Check if we support this system's floating point implementation +#if FLT_RADIX != 2 || FLT_MANT_DIG != 24 || DBL_MANT_DIG != 53 \ + || FLT_MAX_EXP != 128 || DBL_MAX_EXP != 1024 +# error "Floating point implementation not supported." +#endif +typedef float lisp_float32_t; +typedef double lisp_float64_t; + +static ALWAYS_INLINE uint32_t FLOAT32_TO_INT_BITS(lisp_float32_t flt) { + union { + uint32_t int_val; + lisp_float32_t flt_val; + } conv = {.flt_val = flt}; + return conv.int_val; +} + +static ALWAYS_INLINE uint64_t FLOAT64_TO_INT_BITS(lisp_float64_t flt) { + union { + uint64_t int_val; + lisp_float64_t flt_val; + } conv = {.flt_val = flt}; + return conv.int_val; +} + +#define FLOAT_TO_INT_BITS(flt) \ + _Generic((flt), \ + lisp_float32_t: FLOAT32_TO_INT_BITS(flt), \ + lisp_float64_t: FLOAT64_TO_INT_BITS(flt)) + +static ALWAYS_INLINE lisp_float32_t INT_TO_FLOAT32_BITS(uint32_t i) { + union { + uint32_t int_val; + lisp_float32_t flt_val; + } conv = {.int_val = i}; + return conv.flt_val; +} + +static ALWAYS_INLINE lisp_float64_t INT_TO_FLOAT64_BITS(uint64_t i) { + union { + uint64_t int_val; + lisp_float64_t flt_val; + } conv = {.int_val = i}; + return conv.flt_val; +} + +#define INT_TO_FLOAT_BITS(i) \ + _Generic((i), \ + uint32_t: INT_TO_FLOAT32_BITS(i), \ + uint64_t: INT_TO_FLOAT64_BITS(i)) + +// Allocator +void *lisp_realloc(void *oldptr, size_t size); +void *lisp_malloc(size_t size); +void *lisp_malloc0(size_t size); +void *lisp_aligned_alloc(size_t alignment, size_t size); + +#endif diff --git a/src/read.c b/src/read.c new file mode 100644 index 0000000..9688668 --- /dev/null +++ b/src/read.c @@ -0,0 +1,464 @@ +#include "read.h" + +#include "list.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +void read_stream_init(ReadStream *stream, const char *buffer, size_t length) { + stream->buffer = buffer; + stream->len = length; + stream->off = 0; + stream->line = 1; + stream->col = 0; + stream->backquote_level = 0; +} + +#define READ_EOS -1 + +static ALWAYS_INLINE bool EOSP(const ReadStream *stream) { + return stream->off == stream->len; +} + +static int pop_char(ReadStream *stream) { + if (EOSP(stream)) { + return READ_EOS; + } + int c = stream->buffer[stream->off++]; + if (c == '\n') { + ++stream->line; + stream->col = 0; + } else { + ++stream->col; + } + return c; +} + +static int peek_nth_char(const ReadStream *stream, int n) { + if (stream->len - stream->off <= n) { + return READ_EOS; + } + return stream->buffer[stream->off + n]; +} + +static int peek_char(const ReadStream *stream) { + return peek_nth_char(stream, 0); +} + +static ALWAYS_INLINE bool WHITESPACEP(int c) { + return c == ' ' || c == '\t' || c == '\n'; +} + +static void skip_whitespace(ReadStream *stream) { + bool in_comment = false; + int c; + while ((c = peek_char(stream)) != READ_EOS + && (WHITESPACEP(c) || c == ';' || in_comment)) { + pop_char(stream); + if (c == ';') { + in_comment = true; + } else if (c == '\n') { + in_comment = false; + } + } +} + +noreturn void read_error(ReadStream *stream, size_t length, const char *msg, + ...) { + fprintf(stderr, "read-error at %zu:%zu: ", stream->line, stream->col); + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + fprintf(stderr, ":\n context => "); + fwrite(stream->buffer + stream->off, 1, length, stderr); + fputc('\n', stderr); + exit(1); +} + +static ALWAYS_INLINE bool DOT_SYMBOL_P(LispVal *val) { + if (!SYMBOLP(val)) { + return false; + } + LispString *name = ((LispSymbol *) val)->name; + return name->length == 1 && name->data[0] == '.'; +} + +LispVal *next_list(ReadStream *stream) { + pop_char(stream); // the ( + skip_whitespace(stream); + LispVal *start = Qnil; + LispVal *end; + bool dotted = false; + while (peek_char(stream) != ')') { + if (dotted) { + read_error(stream, 0, + "invalid syntax encountered while reading list"); + } + skip_whitespace(stream); + LispVal *new_val = read(stream); + if (new_val && DOT_SYMBOL_P(new_val)) { + if (NILP(start)) { + read_error(stream, 1, "list has no car"); + } + dotted = true; + new_val = read(stream); + } + if (!new_val) { + read_error(stream, 0, "got EOF while reading list"); + } + if (dotted) { + RPLACD(end, new_val); + } else if (NILP(start)) { + start = CONS(new_val, Qnil); + end = start; + } else { + RPLACD(end, CONS(new_val, Qnil)); + end = XCDR(end); + } + skip_whitespace(stream); + } + if (pop_char(stream) == READ_EOS) { // the ) + read_error(stream, 0, "got EOF while reading list"); + } + return start; +} + +LispVal *next_vector(ReadStream *stream) { + pop_char(stream); // the [ + skip_whitespace(stream); + LispVal **data = NULL; + size_t length = 0; + while (peek_char(stream) != ']') { + LispVal *new_val = read(stream); + if (!new_val) { + free(data); + read_error(stream, 0, "got EOF while reading vector"); + } + data = lisp_realloc(data, sizeof(LispVal *) * ++length); + data[length - 1] = new_val; + skip_whitespace(stream); + } + if (pop_char(stream) == READ_EOS) { // the ] + free(data); + read_error(stream, 0, "got EOF while reading vector"); + } + return make_vector(data, length, true); +} + +LispVal *next_string(ReadStream *stream) { + pop_char(stream); // the " + bool backslash = false; + char *buffer = lisp_malloc(1); + buffer[0] = '\0'; + size_t len = 0; + while (backslash || peek_char(stream) != '\"') { + int c = pop_char(stream); + if (c == READ_EOS) { + free(buffer); + read_error(stream, 0, "got EOF while reading string"); + } else if (c == '\\' && !backslash) { + backslash = true; + } else { + if (backslash) { + switch (c) { + case '"': + case '\\': + // do nothing + break; + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case '0': + c = '\0'; + break; + default: + free(buffer); + read_error(stream, 0, "unknown escape sequence"); + } + } + buffer = lisp_realloc(buffer, ++len + 1); + buffer[len - 1] = c; + buffer[len] = '\0'; + backslash = false; + } + } + if (pop_char(stream) == READ_EOS) { // the " + free(buffer); + read_error(stream, 0, "got EOF while reading string"); + } + return make_lisp_string(buffer, len, true, false); +} + +LispVal *next_char_literal(ReadStream *stream) { + pop_char(stream); // the ? + int c = pop_char(stream); + if (c == READ_EOS) { + read_error(stream, 0, "unterminated character literal"); + } else if (c == '\\') { + ReadStream save = *stream; + switch (pop_char(stream)) { + case 'n': + return MAKE_FIXNUM('\n'); + case 't': + return MAKE_FIXNUM('\t'); + case '0': + return MAKE_FIXNUM('\0'); + case 's': + return MAKE_FIXNUM(' '); + case READ_EOS: + read_error(stream, 0, "unterminated character escape sequence"); + default: + read_error(&save, 0, "unknown escape sequence"); + } + } else { + return MAKE_FIXNUM(c); + } +} + +static ALWAYS_INLINE bool SYMBOL_END_P(int c) { + return WHITESPACEP(c) || c == READ_EOS || c == '(' || c == ')' || c == '[' + || c == ']' || c == '\'' || c == '\"' || c == ',' || c == '@' + || c == '`' || c == ';'; +} + +LispVal *next_symbol(ReadStream *stream) { + bool backslash = false; + char *name = lisp_malloc(1); + name[0] = '\0'; + size_t size = 0; + while (backslash || !SYMBOL_END_P(peek_char(stream))) { + int c = pop_char(stream); + if (c == '\\' && !backslash) { + backslash = true; + } else { + if (backslash) { + switch (c) { + case READ_EOS: + free(name); + read_error(stream, 0, "backslash not escaping anything"); + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case '0': + c = '\0'; + break; + default: + if (!SYMBOL_END_P(c)) { + free(name); + read_error(stream, 0, + "invalid escape sequence in symbol name"); + } + } + } + name = lisp_realloc(name, ++size + 1); + name[size - 1] = c; + name[size] = '\0'; + backslash = false; + } + } + return Fintern(make_lisp_string(name, size, true, false)); +} + +#define ANY_BASE -2 +#define INVALID_BASE -1 + +static bool is_base_char(int base, int c) { + c = tolower(c); + if (base == ANY_BASE) { + base = 10; + } + switch (base) { + case 16: + if (c >= 'a' && c <= 'f') { + return true; + } + case 10: + case 8: + case 2: + return c >= '0' && c <= '0' + base - 1; + default: + return false; + } +} + +static int parse_base(const char *c, size_t left) { + if (left >= 2 && *c == '2' && *(c + 1) == '#') { + return 2; + } else if (left >= 2 && *c == '8' && *(c + 1) == '#') { + return 8; + } else if (left >= 3 && *c == '1' && *(c + 1) == '0' && *(c + 2) == '#') { + return 10; + } else if (left >= 3 && *c == '1' && *(c + 1) == '6' && *(c + 2) == '#') { + return 16; + } else { + return INVALID_BASE; + } +} + +LispVal *next_number_or_symbol(ReadStream *stream, int base) { + ReadStream save = *stream; + bool has_decimal = false; + size_t number_start = stream->off; + size_t exp_start = 0; + bool had_number = false; + int c; + while (!SYMBOL_END_P(peek_char(stream))) { + c = pop_char(stream); + if (c == '#') { + if (base != ANY_BASE) { + goto change_to_symbol; + } + base = parse_base(&stream->buffer[number_start], + stream->len - number_start); + if (base == INVALID_BASE) { + goto change_to_symbol; + } + had_number = false; + number_start = stream->off; + } else if (c == '.') { + if (base != ANY_BASE || has_decimal || exp_start) { + goto change_to_symbol; + } + has_decimal = true; + } else if (c == '-' || c == '+') { + if (stream->off - 1 != number_start + && stream->off - 1 != exp_start) { + goto change_to_symbol; + } + // fallthrough + } else if (!is_base_char(base, c)) { + if ((c == 'e' || c == 'E') && !exp_start && base == ANY_BASE + && had_number) { + exp_start = stream->off; + } else { + goto change_to_symbol; + } + } else { + had_number = true; + } + } + if (!had_number) { + goto change_to_symbol; + } + size_t len = stream->off - number_start; + if (has_decimal || exp_start) { + // float + // ceil(# bytes in size_t / 3) + // This works because log10(2^n) is O(n) for k=3 + size_t size_t_len = ((sizeof(size_t) * 8) / 3) + 1; + char fmt_buf[3 + size_t_len + 1]; + if (exp_start == stream->off) { + goto change_to_symbol; + } + snprintf(fmt_buf, sizeof(fmt_buf), "%%%zu" LISP_FLOAT_SCANF, len); + lisp_float_t value; + sscanf(&stream->buffer[number_start], fmt_buf, &value); + return MAKE_LISP_FLOAT(value); + } else { + // integer + // TODO handle large numbers + size_t fixnum_len = ((sizeof(fixnum_t) * 8) / 3) + 1; + char read_buf[fixnum_len + 1]; + if (len > fixnum_len) { + read_error(stream, 0, "numeric literal too large"); + } + memcpy(read_buf, &stream->buffer[number_start], len); + read_buf[len] = '\0'; + intmax_t value = + strtoimax(read_buf, NULL, base == ANY_BASE ? 10 : base); + if (value < MOST_NEGATIVE_FIXNUM || value > MOST_POSITIVE_FIXNUM) { + read_error(stream, 0, "numeric literal too large"); + } + return MAKE_FIXNUM(value); + } + abort(); +change_to_symbol: + *stream = save; + return next_symbol(stream); +} + +LispVal *read(ReadStream *stream) { + skip_whitespace(stream); + if (EOSP(stream)) { + return NULL; + } + int next = peek_char(stream); + switch (next) { + case ')': + case ']': + read_error(stream, 1, "extra closing character"); + case '\'': { + pop_char(stream); + skip_whitespace(stream); + if (EOSP(stream)) { + read_error(stream, 0, "quote not quoting anything"); + } + return LIST(Fintern(LISP_LITSTR("quote")), read(stream)); + } + case '`': { + ++stream->backquote_level; + pop_char(stream); + skip_whitespace(stream); + if (EOSP(stream)) { + read_error(stream, 0, "backquote not quoting anything"); + } + LispVal *to_return = LIST(Fintern(LISP_LITSTR("`")), read(stream)); + --stream->backquote_level; + return to_return; + } + case ',': { + if (!stream->backquote_level) { + read_error(stream, 0, "comma outside of a backquote"); + } + pop_char(stream); + LispVal *car; + if (peek_char(stream) == '@') { + car = Fintern(LISP_LITSTR(",")); + } else { + car = Fintern(LISP_LITSTR(",@")); + } + skip_whitespace(stream); + if (EOSP(stream)) { + read_error(stream, 0, "comma not splicing anything"); + } + return LIST(car, read(stream)); + } break; + case '(': + return next_list(stream); + case '[': + return next_vector(stream); + case '"': + return next_string(stream); + case '?': + return next_char_literal(stream); + case '.': + case '+': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return next_number_or_symbol(stream, ANY_BASE); + default: + return next_symbol(stream); + } +} diff --git a/src/read.h b/src/read.h new file mode 100644 index 0000000..0f47858 --- /dev/null +++ b/src/read.h @@ -0,0 +1,23 @@ +#ifndef INCLUDED_READ_H +#define INCLUDED_READ_H + +#include "base.h" + +#include + +typedef struct { + const char *buffer; + size_t len; + size_t off; + + size_t line; + size_t col; + size_t backquote_level; +} ReadStream; + +void read_stream_init(ReadStream *stream, const char *buffer, size_t length); + +// NULL on eof +LispVal *read(ReadStream *stream); + +#endif