From 1a0906206a4459da8bf40308aff6e7274e50cc7f Mon Sep 17 00:00:00 2001 From: Alexander Rosenberg Date: Thu, 22 Jan 2026 01:31:20 -0800 Subject: [PATCH] Initial tricolor gc --- src/base.c | 1 - src/base.h | 32 +++-- src/gc.c | 336 +++++++++++++++++++++++++----------------------- src/gc.h | 57 ++++---- src/hashtable.c | 7 + src/lisp.c | 1 - src/list.h | 2 + 7 files changed, 239 insertions(+), 197 deletions(-) diff --git a/src/base.c b/src/base.c index b1c5a7e..e9ba282 100644 --- a/src/base.c +++ b/src/base.c @@ -24,7 +24,6 @@ void *lisp_alloc_object_no_gc(size_t size, LispValType type) { LispObject *obj = lisp_aligned_alloc(LISP_OBJECT_ALIGNMENT, size); memset(obj, 0, size); obj->type = type; - obj->gc.mark = false; tss_create(&obj->gc.has_local_ref, NULL); return obj; } diff --git a/src/base.h b/src/base.h index 054ce30..67cb956 100644 --- a/src/base.h +++ b/src/base.h @@ -103,16 +103,6 @@ static ALWAYS_INLINE bool OBJECTP(LispVal *val) { return EXTRACT_TAG(val) == LISP_OBJECT_TAG; } -static ALWAYS_INLINE void SET_OBJECT_MARKED(LispVal *val, bool marked) { - assert(OBJECTP(val)); - ((LispObject *) val)->gc.mark = marked; -} - -static ALWAYS_INLINE bool OBJECT_MARKED_P(LispVal *val) { - assert(OBJECTP(val)); - return ((LispObject *) val)->gc.mark; -} - // ONLY APPLIES TO THE CALLING THREAD static ALWAYS_INLINE bool OBJECT_HAS_LOCAL_REFERENCE_P(LispVal *val) { assert(OBJECTP(val)); @@ -128,6 +118,28 @@ static ALWAYS_INLINE void SET_OBJECT_HAS_LOCAL_REFERENCE(LispVal *val, (void *) (uintptr_t) (has_local_ref ? 1 : 0)); } +static ALWAYS_INLINE ObjectGCSet OBJECT_GET_GC_SET(LispVal *val) { + assert(OBJECTP(val)); + return ((LispObject *) val)->gc.set; +} + +static ALWAYS_INLINE bool OBJECT_GC_SET_P(LispVal *val, ObjectGCSet set) { + return OBJECT_GET_GC_SET(val) == set; +} + +static ALWAYS_INLINE bool OBJECT_STATIC_P(LispVal *val) { + assert(OBJECTP(val)); + return ((LispObject *) val)->gc.is_static; +} + +static inline void MARK_OBJECT_ADDED(LispVal *val, LispVal *into) { + ObjectGCSet val_set = OBJECT_GET_GC_SET(val); + ObjectGCSet into_set = OBJECT_GET_GC_SET(into); + if (into_set == GC_BLACK && val_set == GC_WHITE) { + gc_move_to_set(val, GC_GREY); + } +} + static ALWAYS_INLINE LispValType TYPE_OF(LispVal *val) { if (FIXNUMP(val)) { return TYPE_FIXNUM; diff --git a/src/gc.c b/src/gc.c index b07fc1d..ffb1126 100644 --- a/src/gc.c +++ b/src/gc.c @@ -8,62 +8,6 @@ #include -bool lisp_doing_gc; -struct timespec total_gc_time; -size_t total_gc_count; - -#define GC_OBJECTS_BLOCK_SIZE 128 -struct GCObjectsBlock { - LispVal *objs[GC_OBJECTS_BLOCK_SIZE]; -}; -struct GCObjects { - size_t num_blocks; - size_t num_objs; - struct GCObjectsBlock **blocks; -}; - -static struct GCObjects gc_objects; -static struct GCObjects static_objects; - -static void init_gc_objects(struct GCObjects *restrict objects) { - objects->num_blocks = 0; - objects->num_objs = 0; - objects->blocks = NULL; -} - -static void gc_objects_register(struct GCObjects *restrict objects, - LispVal *obj) { - assert(OBJECTP(obj)); - if (objects->num_blocks * GC_OBJECTS_BLOCK_SIZE == objects->num_objs) { - objects->blocks = - lisp_realloc(objects->blocks, sizeof(struct GCObjectsBlock *) - * ++objects->num_blocks); - objects->blocks[objects->num_blocks - 1] = - lisp_malloc(sizeof(struct GCObjectsBlock)); - objects->blocks[objects->num_blocks - 1]->objs[0] = obj; - } else { - size_t block_idx = objects->num_objs / GC_OBJECTS_BLOCK_SIZE; - size_t small_idx = objects->num_objs % GC_OBJECTS_BLOCK_SIZE; - objects->blocks[block_idx]->objs[small_idx] = obj; - } - ++objects->num_objs; -} - -void lisp_init_gc(void) { - init_gc_objects(&gc_objects); - init_gc_objects(&static_objects); -} - -void lisp_gc_register_object(void *obj) { - gc_objects_register(&gc_objects, obj); -} - -// we don't have to put static objects into all_objects because they will never -// be GCed -void lisp_gc_register_static_object(void *obj) { - gc_objects_register(&static_objects, obj); -} - void free_object_process_stack(ObjectProcessStack *restrict stack) { for (size_t i = 0; i < stack->num_blocks; ++i) { lisp_free(stack->blocks[i]); @@ -177,69 +121,112 @@ void *object_process_stack_pop(ObjectProcessStack *restrict stack) { return stack->blocks[block_idx]->objs[small_idx]; } -void gc_recursively_mark_object(void *obj) { - if (!OBJECTP(obj) || OBJECT_MARKED_P(obj)) { +bool lisp_doing_gc; +struct timespec total_gc_time; +size_t total_gc_count; + +struct GCObjectList { + LispVal *obj; + struct GCObjectList *prev; + struct GCObjectList *next; +}; + +#define FREE_OBJECTS_LIST_LIMIT 1024 +static size_t free_objects_list_count; +static struct GCObjectList *free_objects_list; + +static struct GCObjectList *black_objects; +static struct GCObjectList *grey_objects; +static struct GCObjectList *white_objects; +static struct GCObjectList *static_objects; + +ObjectGCSet GC_BLACK = 0; +ObjectGCSet GC_GREY = 1; +ObjectGCSet GC_WHITE = 2; + +static ALWAYS_INLINE struct GCObjectList **HEAD_FOR_SET(ObjectGCSet set) { + if (set == GC_BLACK) { + return &black_objects; + } else if (set == GC_GREY) { + return &grey_objects; + } else { + return &white_objects; + } +} + +static struct GCObjectList *alloc_gc_objects_list_node(void) { + if (free_objects_list) { + struct GCObjectList *to_return = free_objects_list; + free_objects_list = free_objects_list->next; + --free_objects_list_count; + return to_return; + } else { + return lisp_malloc(sizeof(struct GCObjectList)); + } +} + +static void unuse_gc_objects_list_node(struct GCObjectList *node) { + node->next = free_objects_list; + free_objects_list = node; + ++free_objects_list_count; +} + +void lisp_gc_register_object(void *val) { + if (!OBJECTP(val)) { return; } - SET_OBJECT_MARKED(obj, true); - ObjectProcessStack stack; - init_object_process_stack(&stack); - object_process_stack_push_held_objects(&stack, obj); - while (!OBJECT_PROCESS_STACK_EMPTY_P(&stack)) { - LispVal *top = object_process_stack_pop(&stack); - if (!OBJECT_MARKED_P(top)) { - SET_OBJECT_MARKED(top, true); - object_process_stack_push_held_objects(&stack, top); - } - } - free_object_process_stack(&stack); + LispObject *obj = val; + obj->gc.is_static = false; + obj->gc.set = GC_WHITE; + struct GCObjectList *node = alloc_gc_objects_list_node(); + obj->gc.gc_node = node; + node->prev = NULL; + node->next = black_objects; + node->obj = val; } -// as we never delete static objects, all but the last block of the static -// objects array is filled -static void gc_mark_static_roots(void) { - for (size_t i = 0; i < static_objects.num_blocks - 1; ++i) { - for (size_t j = 0; j < GC_OBJECTS_BLOCK_SIZE; ++j) { - gc_recursively_mark_object(static_objects.blocks[i]->objs[j]); - } +void lisp_gc_register_static_object(void *val) { + if (!OBJECTP(val)) { + return; } - for (size_t i = 0; i < static_objects.num_objs % GC_OBJECTS_BLOCK_SIZE; - ++i) { - gc_recursively_mark_object( - static_objects.blocks[static_objects.num_blocks - 1]->objs[i]); + lisp_gc_register_object(val); + LispObject *obj = val; + obj->gc.is_static = true; + struct GCObjectList *node = alloc_gc_objects_list_node(); + node->prev = NULL; + node->next = static_objects; + node->obj = obj; +} + +static void unregister_object_node(LispObject *obj) { + struct GCObjectList *node = obj->gc.gc_node; + if (!node->prev) { + *HEAD_FOR_SET(obj->gc.set) = node->next; + } else { + node->prev->next = node->next; + } + if (node->next) { + node->next->prev = node->prev; } } -static void gc_mark_stack_local_refs(struct LocalReferences *restrict refs) { - size_t full_blocks = refs->num_refs / LOCAL_REFERENCES_BLOCK_LENGTH; - size_t last_block_len = refs->num_refs % LOCAL_REFERENCES_BLOCK_LENGTH; - for (size_t i = 0; i < full_blocks; ++i) { - for (size_t j = 0; j < LOCAL_REFERENCES_BLOCK_LENGTH; ++j) { - gc_recursively_mark_object(refs->blocks[i]->refs[j]); - } +void gc_move_to_set(void *val, ObjectGCSet new_set) { + if (!OBJECTP(val)) { + return; } - for (size_t i = 0; i < last_block_len; ++i) { - gc_recursively_mark_object(refs->blocks[full_blocks]->refs[i]); + LispObject *obj = val; + if (OBJECT_STATIC_P(obj) && new_set == GC_WHITE) { + // static objects are always reachable. do this to optimize the macros + // in base.h + new_set = GC_GREY; } -} - -static void gc_mark_stack_frame(struct StackFrame *restrict frame) { - gc_recursively_mark_object(frame->name); - gc_recursively_mark_object(frame->args); - gc_recursively_mark_object(frame->lexenv); - gc_recursively_mark_object(frame->fobj); - gc_mark_stack_local_refs(&frame->local_refs); -} - -static void gc_mark_and_compact_the_stack(void) { - size_t i; - for (i = 0; i < the_stack.depth; ++i) { - gc_mark_stack_frame(&the_stack.frames[i]); + if (obj->gc.set != new_set) { + struct GCObjectList *node = obj->gc.gc_node; + unregister_object_node(obj); + node->prev = NULL; + node->next = *HEAD_FOR_SET(new_set); + *HEAD_FOR_SET(new_set) = node; } - for (; i < the_stack.first_clear_local_refs; ++i) { - compact_stack_frame(&the_stack.frames[i]); - } - the_stack.first_clear_local_refs = the_stack.depth; } static void free_object(LispVal *val) { @@ -271,68 +258,95 @@ static void free_object(LispVal *val) { default: abort(); } + unregister_object_node(val); + unuse_gc_objects_list_node(((LispObject *) val)->gc.gc_node); lisp_release_object(val); } -static ALWAYS_INLINE void do_sweep_object(LispGCStats *restrict stats, - LispVal *cur_obj, - size_t *restrict free_block, - size_t *restrict free_idx) { -#ifndef _NDEBUG +static void mark_object_recurse(LispGCStats *restrict stats, LispVal *val) { + if (!OBJECTP(val) || OBJECT_GC_SET_P(val, GC_BLACK)) { + return; + } + ObjectProcessStack stack; + init_object_process_stack(&stack); + gc_move_to_set(val, GC_BLACK); + object_process_stack_push_held_objects(&stack, val); ++stats->total_objects_searched; -#endif - if (!OBJECT_MARKED_P(cur_obj)) { - ++stats->total_objects_cleaned; - free_object(cur_obj); - --gc_objects.num_objs; - } else { - SET_OBJECT_MARKED(cur_obj, false); - gc_objects.blocks[*free_block]->objs[*free_idx] = cur_obj; - if (++*free_idx == GC_OBJECTS_BLOCK_SIZE) { - ++*free_block; - free_idx = 0; + while (!OBJECT_PROCESS_STACK_EMPTY_P(&stack)) { + LispVal *cur = object_process_stack_pop(&stack); + if (!OBJECT_GC_SET_P(cur, GC_BLACK)) { + ++stats->total_objects_searched; + gc_move_to_set(cur, GC_BLACK); + object_process_stack_push_held_objects(&stack, cur); } } + free_object_process_stack(&stack); +} + +static void mark_statics(LispGCStats *restrict stats) { + for (struct GCObjectList *node = static_objects; node; node = node->next) { + mark_object_recurse(stats, node->obj); + } } -static void gc_sweep_objects(LispGCStats *restrict stats) { -#ifdef _NDEBUG - stats->total_objects_searched = gc_objects.num_objs; -#else - stats->total_objects_searched = 0; -#endif - size_t free_block = 0; - size_t free_idx = 0; - size_t n_full_blocks = gc_objects.num_blocks - 1; - size_t small_idx = gc_objects.num_objs % GC_OBJECTS_BLOCK_SIZE; - if (small_idx == 0) { - ++n_full_blocks; - } - for (size_t search_block = 0; search_block < n_full_blocks; - ++search_block) { - for (size_t search_idx = 0; search_idx < GC_OBJECTS_BLOCK_SIZE; - ++search_idx) { - LispVal *cur_obj = - gc_objects.blocks[search_block]->objs[search_idx]; - do_sweep_object(stats, cur_obj, &free_block, &free_idx); +static void mark_stack_local_refs(LispGCStats *restrict stats, + struct LocalReferences *restrict refs) { + size_t full_blocks = refs->num_refs / LOCAL_REFERENCES_BLOCK_LENGTH; + size_t last_block_len = refs->num_refs % LOCAL_REFERENCES_BLOCK_LENGTH; + for (size_t i = 0; i < full_blocks; ++i) { + for (size_t j = 0; j < LOCAL_REFERENCES_BLOCK_LENGTH; ++j) { + mark_object_recurse(stats, refs->blocks[i]->refs[j]); } } - for (size_t i = 0; i < small_idx; ++i) { - do_sweep_object(stats, - gc_objects.blocks[gc_objects.num_blocks - 1]->objs[i], - &free_block, &free_idx); + for (size_t i = 0; i < last_block_len; ++i) { + mark_object_recurse(stats, refs->blocks[full_blocks]->refs[i]); } - // clean up the now empty blocks - size_t non_empty_blocks = gc_objects.num_objs / GC_OBJECTS_BLOCK_SIZE; - if (gc_objects.num_objs % GC_OBJECTS_BLOCK_SIZE != 0) { - ++non_empty_blocks; +} + +static void mark_stack_frame(LispGCStats *restrict stats, + struct StackFrame *frame) { + mark_object_recurse(stats, frame->name); + mark_object_recurse(stats, frame->args); + mark_object_recurse(stats, frame->fobj); + mark_object_recurse(stats, frame->lexenv); + mark_stack_local_refs(stats, &frame->local_refs); +} + +static void mark_and_compact_the_stack(LispGCStats *restrict stats) { + mark_object_recurse(stats, the_stack.nogc_retval); + size_t i; + for (i = 0; i < the_stack.depth; ++i) { + mark_stack_frame(stats, &the_stack.frames[i]); } - for (size_t i = non_empty_blocks; i < gc_objects.num_blocks; ++i) { - lisp_free(gc_objects.blocks[i]); + for (; i < the_stack.first_clear_local_refs; ++i) { + compact_stack_frame(&the_stack.frames[i]); } - gc_objects.num_blocks = non_empty_blocks; - gc_objects.blocks = lisp_realloc( - gc_objects.blocks, sizeof(struct GCObjectsBlock *) * non_empty_blocks); + the_stack.first_clear_local_refs = the_stack.depth; +} + +static void gc_sweep_objects(LispGCStats *restrict stats) { + while (black_objects) { + ++stats->total_objects_cleaned; + free_object(black_objects->obj); + } +} + +static void maybe_free_some_object_list_nodes(void) { + while (free_objects_list_count > FREE_OBJECTS_LIST_LIMIT) { + struct GCObjectList *to_free = free_objects_list; + free_objects_list = free_objects_list->next; + lisp_free(to_free); + --free_objects_list_count; + } +} + +static void swap_white_black_sets(void) { + struct GCObjectList *tmp_node = white_objects; + white_objects = black_objects; + black_objects = tmp_node; + ObjectGCSet tmp_id = GC_WHITE; + GC_WHITE = GC_BLACK; + GC_BLACK = tmp_id; } void lisp_gc_now(LispGCStats *restrict stats) { @@ -344,10 +358,12 @@ void lisp_gc_now(LispGCStats *restrict stats) { stats->total_objects_cleaned = 0; struct timespec start_time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time); - gc_mark_static_roots(); - gc_recursively_mark_object(obarray); - gc_mark_and_compact_the_stack(); + mark_statics(stats); + mark_object_recurse(stats, obarray); + mark_and_compact_the_stack(stats); gc_sweep_objects(stats); + maybe_free_some_object_list_nodes(); + swap_white_black_sets(); struct timespec end_time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time); sub_timespecs(&end_time, &start_time, &stats->ellapsed_time); diff --git a/src/gc.h b/src/gc.h index 2db39c5..c5b7b96 100644 --- a/src/gc.h +++ b/src/gc.h @@ -7,30 +7,6 @@ #include #include -extern bool lisp_doing_gc; -extern struct timespec total_gc_time; -extern size_t total_gc_count; - -typedef struct { - size_t total_objects_searched; - size_t total_objects_cleaned; - struct timespec ellapsed_time; -} LispGCStats; - -typedef struct { - unsigned int mark : 1; - tss_t has_local_ref; -} ObjectGCInfo; - -void lisp_init_gc(void); - -// the argument is a LispVal * -void lisp_gc_register_object(void *obj); -void lisp_gc_register_static_object(void *obj); - -// note that the argument is restrict! -void lisp_gc_now(LispGCStats *restrict status); - #define OBJECT_PROCESS_STACK_BLOCK_SIZE 64 struct ObjectProcessStackBlock { void *objs[OBJECT_PROCESS_STACK_BLOCK_SIZE]; @@ -59,7 +35,38 @@ void object_process_stack_push_held_objects(ObjectProcessStack *restrict stack, void *obj); void *object_process_stack_pop(ObjectProcessStack *restrict stack); -void gc_recursively_mark_object(void *obj); +extern bool lisp_doing_gc; +extern struct timespec total_gc_time; +extern size_t total_gc_count; + +typedef struct { + size_t total_objects_searched; + size_t total_objects_cleaned; + struct timespec ellapsed_time; +} LispGCStats; + +typedef uint8_t ObjectGCSet; + +extern ObjectGCSet GC_BLACK; +extern ObjectGCSet GC_GREY; +extern ObjectGCSet GC_WHITE; + +struct GCObjectList; + +typedef struct { + unsigned int is_static : 1; + ObjectGCSet set : 2; + tss_t has_local_ref; + struct GCObjectList *gc_node; +} ObjectGCInfo; + +// the argument is a LispVal * +void lisp_gc_register_object(void *val); +void lisp_gc_register_static_object(void *val); +void gc_move_to_set(void *val, ObjectGCSet new_set); + +// note that the argument is restrict! +void lisp_gc_now(LispGCStats *restrict status); // Debug void debug_print_gc_stats(FILE *stream, const LispGCStats *stats); diff --git a/src/hashtable.c b/src/hashtable.c index 7f5ba6d..8f5b010 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -14,7 +14,9 @@ LispVal *make_hash_table_no_gc(LispVal *hash_fn, LispVal *eq_fn) { LispHashTable *obj = lisp_alloc_object_no_gc(sizeof(LispHashTable), TYPE_HASH_TABLE); obj->eq_fn = eq_fn; + MARK_OBJECT_ADDED(eq_fn, obj); obj->hash_fn = hash_fn; + MARK_OBJECT_ADDED(hash_fn, obj); obj->count = 0; obj->size = INITIAL_SIZE; obj->data = lisp_malloc0(sizeof(struct HashTableBucket) * obj->size); @@ -33,7 +35,9 @@ DEFUN(make_hash_table, "make-hash-table", (LispVal * hash_fn, LispVal *eq_fn), LispHashTable *obj = lisp_alloc_object(sizeof(LispHashTable), TYPE_HASH_TABLE); obj->eq_fn = eq_fn; + MARK_OBJECT_ADDED(eq_fn, obj); obj->hash_fn = hash_fn; + MARK_OBJECT_ADDED(hash_fn, obj); obj->count = 0; obj->size = INITIAL_SIZE; obj->data = lisp_malloc0(sizeof(struct HashTableBucket) * obj->size); @@ -114,6 +118,7 @@ DEFUN(puthash, "puthash", (LispVal * ht, LispVal *key, LispVal *val), LispHashTable *obj = ht; if (obj->cache_bucket && key == obj->cache_bucket->key) { obj->cache_bucket->value = val; + MARK_OBJECT_ADDED(val, ht); return Qnil; } maybe_rehash(ht); @@ -122,8 +127,10 @@ DEFUN(puthash, "puthash", (LispVal * ht, LispVal *key, LispVal *val), if (HT_BUCKET_EMPTY_P(b)) { b->hash = hash; b->key = key; + MARK_OBJECT_ADDED(key, ht); } b->value = val; + MARK_OBJECT_ADDED(val, ht); ++((LispHashTable *) ht)->count; obj->cache_bucket = b; return Qnil; diff --git a/src/lisp.c b/src/lisp.c index f3c8e9f..d1d1424 100644 --- a/src/lisp.c +++ b/src/lisp.c @@ -39,7 +39,6 @@ static void register_manual_symbols(void) { } void lisp_init(void) { - lisp_init_gc(); construct_manual_symbols(); obarray = Fmake_hash_table(Qhash_string, Qstrings_equal); // these call Fintern, so they need to have obarray constructed diff --git a/src/list.h b/src/list.h index cf3af12..eb80f29 100644 --- a/src/list.h +++ b/src/list.h @@ -43,11 +43,13 @@ static ALWAYS_INLINE LispVal *XCDR_SAFE(LispVal *cons) { static ALWAYS_INLINE void RPLACA(LispVal *cons, LispVal *newcar) { assert(CONSP(cons)); ((LispCons *) cons)->car = newcar; + MARK_OBJECT_ADDED(newcar, cons); } static ALWAYS_INLINE void RPLACD(LispVal *cons, LispVal *newcdr) { assert(CONSP(cons)); ((LispCons *) cons)->cdr = newcdr; + MARK_OBJECT_ADDED(newcdr, cons); } static ALWAYS_INLINE LispVal *LIST1(LispVal *v1) {