From 7079d8925bd285f7d79befb59974e9094957fb6f Mon Sep 17 00:00:00 2001 From: Alexander Rosenberg Date: Fri, 27 Sep 2024 04:50:47 -0700 Subject: [PATCH] A lot more work --- BYTECODE.md | 244 ++++++----- CMakeLists.txt | 2 + bootstrap/ast.c | 2 +- bootstrap/compile.c | 910 ++++++++++++++++++++++++++++++++---------- bootstrap/compile.h | 4 +- bootstrap/constants.c | 41 +- bootstrap/constants.h | 130 +++--- bootstrap/main.c | 18 + bootstrap/test.sl | 17 +- 9 files changed, 984 insertions(+), 384 deletions(-) diff --git a/BYTECODE.md b/BYTECODE.md index ba5a2ff..dadaece 100644 --- a/BYTECODE.md +++ b/BYTECODE.md @@ -24,6 +24,7 @@ The argument types are as follows: | double | Equivilant to the C type "double" | | reg | register (format: type:u8,which:u32) | | str | string (format: length:u64,data:[i8]) | +| bool | boolean (format: u8) | ### Registers Most instructions take register numbers instead of direct arguments. Registers @@ -62,12 +63,18 @@ lexenv register or "arg0" is the first argument register. - VECTOR dest:reg, count:u64 Create a vector from the first COUNT "arg" registers and store it into DEST. - + +- LENGTH dest:reg, thing:reg + Place the length of the THING, a string, vector, or list, into DEST. + - INTERN\_LIT reg:reg, name:str INTERN\_DYN reg:reg, name:reg These instructions convert the string literal or register containing a string NAME into a symbol and store the symbol into REG. +- TYPE_OF dest:reg, thing:reg + Place a symbol representing the type of THING into DEST. + - SYMBOL\_NAME dest:reg, sym:reg Store the name of SYM into DEST. @@ -89,17 +96,16 @@ lexenv register or "arg0" is the first argument register. - GET\_RETVAL\_COUNT dest:reg Place the count last set with RETVAL\_COUNT into DEST. -- ENTER\_LEXENV - ENTER\_INHERITED\_LEXENV - LEAVE\_LEXENV - Push or restore the current lexical environment. An inherited lexenv does not - save or restore the "saved" registers. +- ENTER\_LEXENV count:u64 + Enter a new lexical environment. COUNT is the number of instructions that this + environment lasts. -- ENTER\_BLOCK sym:reg, count:u64 - LEAVE\_BLOCK sym:reg - Enter a new named block which is identified by the symbol in SYM. The block is - COUNT instructions long. LEAVE\_BLOCK leaved the block identified by - SYM. +- ENTER\_BLOCK name:str, count:u64 + LEAVE\_BLOCK name:str + Enter a new named block which is identified by the symbol named NAME. The + block is COUNT instructions long. LEAVE\_BLOCK leaves the block identified by + NAME. LEAVE\_BLOCK will jump to the end of the block, and is not necessary + unless you want to exit the block early. - SET\_VALUE sym:reg, value:reg Set the value as a variable of SYM to VALUE. @@ -114,12 +120,29 @@ lexenv register or "arg0" is the first argument register. - GET\_FUNCTION dest:reg, sym:reg Store the value as a function of the symbol SYM into DEST. -- NEWFUNCTION\_LIT dest:reg, count:u64 - NEWFUNCTION\_DYN dest:reg, src:reg +- BOUNDP dest:reg, sym:reg + If SYM has a value as a variable, place t in DEST, otherwise, place nil in DEST. + +- FUNCTIONP dest:reg, sym:reg + If SYM has a value as a function, place t in DEST, otherwise, place nil in DEST. + +- NEWFUNCTION\_LIT dest:reg, nreq:u32, nopt:u32, nkey:u32, + aok:bool, rest:bool, count:u64 + NEWFUNCTION\_DYN dest:reg, nreq:u32, nopt:u32, nkey:u32, + aok:bool, rest:bool, src:reg Create a new function object and store it into DEST. If the first case the next COUNT instructions are considered to be the function and are skipped. In the second case SRC should be a list or vector containing the bytecode for the - function. + function. NREQ, NOPT, and NKEY are the number of required, optional, and key + arguments required for this function. The names of the keyword arguments are + passed in the "arg" registers. The runtime will handle normalizing these for + you. AOK is allow other keys, REST is wether to allow more positional + arguments. Arguments are passed by the runtime in "arg" registers. The first + required argument is passed in the "arg0" register. After required are + optional arguments. These are passed in two registers. The first is either nil + or t weather or not he argument was actually passed. The second is its value, + or nil if it was not passed. The same is true for key arguments. Finally if + rest is passed, any extra arguments are passed as a list as the final element. - PUT sym:reg, key:reg, value:reg Associate KEY with VALUE in the plist of SYM. @@ -127,21 +150,32 @@ lexenv register or "arg0" is the first argument register. - GET dest:reg, sym:reg, key:reg Store the value associated with KEY in the plist of SYM into DEST. -- AND dest:reg, count:u64, values:[reg] - OR dest:reg, count:u64, values:[reg] - XOR dest:reg, count:u64, values:[reg] - NOT dest:reg, value:reg - Perform a logical operation on each of VALUES. For example, the XOR - instruction will exclusively or each of VALUES with the next value, and store - the overall result into DEST. NOT is special as it can only take one value, - of which it will take the logical negation and store it into DEST. +- AND2 dest:reg, val1:reg, val2:reg + ANDN dest:reg, count:u64 + Logical and VAL1 and VAL2, or the first COUNT "arg" registers, and place the + result in DEST. + +- OR2 dest:reg, val1:reg, val2:reg + ORN dest:reg, count:u64 + Logical or VAL1 and VAL2, or the first COUNT "arg" registers, and place the + result in DEST. + +- XOR2 dest:reg, val1:reg, val2:reg + XORN dest:reg, count:u64 + Logical xor VAL1 and VAL2, or the first COUNT "arg" registers, and place the + result in DEST. + +- NOT dest:reg, value:reg + Take the logical negation of VALUE and place it in DEST. -- CJMP cond:reg, offset:i64 +- JMP offset:i64 + CJMP cond:reg, offset:i64 If the value in COND is truthy (not nil), skip the next OFFSET instructions. If OFFSET is negative, instead go back abs(OFFSET) instructions. CJMP is NOT counted as an instruction for the purposes of counting offsets. Therefore an OFFSET of -2 means restart execute at the - instruction above the instruction above this CJMP. + instruction above the instruction above this CJMP. JMP is like CJMP, but takes + no condition and always jumps. - CAR dest:reg, cons:reg CDR dest:reg, cons:reg @@ -151,21 +185,17 @@ lexenv register or "arg0" is the first argument register. SETCDR cons:reg, value:reg Store VALUE into the car or cdr of CONS. - - GETELT\_LIT dest:reg, seq:reg, index:u64 GETELT\_DYN dest:reg, seq:reg, index:reg - Store the value at INDEX in SEQ (a list or vector) into DEST. + Store the value at INDEX in SEQ (a list, string, or vector) into DEST. - SETELT\_LIT seq:reg, index:u64, value:reg SETELT\_DYN seq:reg, index:reg, value:reg - Store VALUE into the index numbered INDEX of SEQ (a list or vector). + Store VALUE into the index numbered INDEX of SEQ (a list, string, or vector). -- EQ\_TWO dest:reg, val1:reg, val2:reg - EQ\_N dest:reg, count:u64 +- EQ dest:reg, val1:reg, val2:reg Compare VAL1 and VAL2, if they are the same object (or symbols with the same - name) store T into DEST, otherwise, store NIL. In the case of EQ\_N, if the - first COUNT "arg" registers are the same object (or symbols with the same - name), store T into DEST, otherwise, store NIL. + name) store T into DEST, otherwise, store NIL. - NUM\_GT dest:reg, val1:reg, val2:reg NUM\_GE dest:reg, val1:reg, val2:reg @@ -175,99 +205,63 @@ lexenv register or "arg0" is the first argument register. Compare VAL1 and VAL2, which must be numbers, and compare their values. If they pass, store T into DEST, otherwise store NIL. -## Mnemonic Conversion Table +- ADD dest:reg, count:u64 + Add the first COUNT "arg" registers and place the result into DEST. + +- SUB dest:reg, val1:reg, val2:reg + Subtract the sum of the first COUNT - 1 "arg" registers starting from "arg1", + that is "arg1", "arg2", "arg3", etc., from "arg0". Store the result in DEST. + +- MUL dest:reg, count:u64 + Multiply the first COUNT "arg" registers and place the result in DEST. -| Mnemonic | Number | -|:-----------------|:------:| -| STRING\_LIT | 0 | -| STRING\_DYN | 1 | -| INT | 2 | -| FLOAT | 3 | -| CONS | 4 | -| LSIT\_LIT | 5 | -| LSIT\_DYN | 6 | -| VECTOR\_LIT | 7 | -| VECTOR\_DYN | 8 | -| INTERN\_LIT | 9 | -| INTERN\_DYN | 10 | -| SYMBOL\_NAME | 11 | -| MOV | 12 | -| FUNCALL | 13 | -| RETVAL\_COUNT | 14 | -| ENTER\_LEXENV | 15 | -| LEAVE\_ELEXENV | 16 | -| ENTER\_BLOCK | 17 | -| LEAVE\_BLOCK | 18 | -| SET\_VALUE | 19 | -| SET\_FUNCTION | 20 | -| GET\_VALUE | 21 | -| GET\_FUNCTION | 22 | -| NEWFUNCTION\_LIT | 23 | -| NEWFUNCTION\_DYN | 24 | -| PUT | 25 | -| GET | 26 | -| AND | 27 | -| OR | 28 | -| XOR | 29 | -| NOT | 30 | -| CJMP | 31 | -| CAR | 32 | -| CDR | 33 | -| SETCAR | 34 | -| SETCDR | 35 | -| GETELT\_LIT | 36 | -| GETELT\_DYN | 37 | -| SETELT\_LIT | 38 | -| SETELT\_DYN | 39 | -| EQ\_TWO | 40 | -| EQ\_N | 41 | -| NUM\_GT | 42 | -| NUM\_GE | 43 | -| NUM\_EQ | 44 | -| NUM\_LE | 45 | -| NUM\_LT | 46 | +- DIV dest:reg, count:u64 + Divide "arg0" by the product of the COUNT - 1 "arg" registers starting from "arg1" -## Examples -This: -```lisp -(format t "Hello World~%") -``` -Compiles into: -```text -INTERN_LIT val0, "Hello World~%" -``` +- INT_DIV dest:reg, val1:reg, val2:reg + Divide VAL1 by VAL2 and place the integer part of the result in DEST. + +- RECIP dest:reg, val:reg + Divide 1 by VAL and place the result in DEST. -This: -```lisp -(defun foo (bar &key baz &rest qux) - "FOO each of BAR and BAZ as well as each of QUX." - (let (some-val (genval bar)) - (foo-internal some-val :baz baz qux))) - -(foo 10 :baz 20) -``` -Compiles into: -```text -NEWFUNCTION val0, 13 ;; not counting this instruction -ENTER_LEXENV -INTERN_LIT saved0, "foo" -ENTER_BLOCK saved0, 9 ;; not counting this instruction -INTERN_LIT val0, "genval" -;; NOTE bar already in arg0 -FUNCALL val0 ;; NOTE val0 clobbered here -INTERN_LIT val0, "foo-internal" -MOV arg0, ret0 -MOV arg3, arg2 -MOV arg2, arg1 -INTERN_LIT arg1, ":baz" -FUNCALL val0 -LEAVE_BLOCK saved0 -LEAVE_LEXENV - -INTERN_LIT val1, "foo" -SET_FUNCTION val1, val0 -INT arg0, 10 -INTERN_LIT arg1, ":baz" -INT arg2, 20 -FUNCALL val1 -``` +- MOD dest:reg, val1:reg, val2:reg + Divide VAL1 by VAL2 and place the remainder in DEST. + +- SQRT dest:reg, val:reg + Take the square root of VAL and place it in DEST. + +- POW dest:reg, base:reg, exp:reg + Raise BASE to the EXP power and place the result in DEST. + +- LN dest:reg, val:reg + Take the natural log of VAL and place the result in DEST. + +- EXP dest:reg, val:reg + Take the exponential function at VAL and place the result in DEST. + +- SIN dest:reg, val:reg + COS dest:reg, val:reg + TAN dest:reg, val:reg + Take the sine, cosine, or tangent of VAL (in radians) and place it into DEST. + +- ASIN dest:reg, val:reg + ACOS dest:reg, val:reg + ATAN dest:reg, val:reg + Take the inverse sine, cosine, or tangent of VAL and place the result (in + radians) into DEST. + +- BITAND dest:reg, val1:reg, val2:reg + BITOR dest:reg, val1:reg, val2:reg + BITXOR dest:reg, val1:reg, val2:reg + BITNOR dest:reg, val1:reg, val2:reg + Perform the given bit-wise operation on VAL1 and VAL2 and place the result + into DEST. + +- BITNEG dest:reg, val:reg + Negate each bit in VAL. That is, flip each bit in VAL. + +- LSH dest:reg, val:reg, by:reg + ASH dest:reg, val:reg, by:reg + Either logically or arithmetically shift VAL by BY bits to the left (or right + is BY is negative). Put the result in DEST. + diff --git a/CMakeLists.txt b/CMakeLists.txt index e598b8b..374211e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,3 +14,5 @@ endforeach() add_executable(bootstrap-slc ${REAL_BOOTSTRAP_FILES}) target_link_libraries(bootstrap-slc m) +# target_link_libraries(bootstrap-slc m "-fsanitize=undefined" "-fsanitize=address") +# target_compile_options(bootstrap-slc PUBLIC "-fsanitize=undefined" "-fsanitize=address") diff --git a/bootstrap/ast.c b/bootstrap/ast.c index 67eb60e..921aa45 100644 --- a/bootstrap/ast.c +++ b/bootstrap/ast.c @@ -262,7 +262,6 @@ static AstIntNode *process_char_token(Token *token, AstErrorList **err) { if (!convert_named_char_escape(sym + 1, &c) && !convert_c_style_char_escape(sym + 1, &c) && !convert_numeric_char_escape(sym + 1, &c, false)) { - free(token->text); free(node); push_build_error(err, token, 0, "invalid escape sequence in character literal"); @@ -676,6 +675,7 @@ static AstNode *process_token(Token *token, TokenStream *stream, abort(); break; } + free(token->text); return retval; } diff --git a/bootstrap/compile.c b/bootstrap/compile.c index 3cf8dee..68e9b6f 100644 --- a/bootstrap/compile.c +++ b/bootstrap/compile.c @@ -1,5 +1,6 @@ #include "compile.h" +#include #include #include #include @@ -70,11 +71,11 @@ typedef enum { ARG_STR, ARG_BYTES, ARG_DOUBLE, + ARG_BOOL, } InstArgType; static bool emit_next_arg_c(CompileError **err, FILE *stream, - va_list args) { - InstArgType type = va_arg(args, InstArgType); + InstArgType type, va_list args) { if (type == ARG_STR || type == ARG_BYTES) { size_t len; if (type == ARG_BYTES) { @@ -105,6 +106,14 @@ static bool emit_next_arg_c(CompileError **err, FILE *stream, double value = va_arg(args, double); fprintf(stream, "%f", value); return true; + } else if (type == ARG_BOOL) { + uint32_t value = va_arg(args, uint32_t); + if (value) { + fprintf(stream, "true"); + } else { + fprintf(stream, "false"); + } + return true; } uint64_t val; const char *format; @@ -139,8 +148,7 @@ static bool emit_next_arg_c(CompileError **err, FILE *stream, } static bool emit_next_arg_bin(CompileError **err, FILE *stream, - va_list args) { - InstArgType type = va_arg(args, InstArgType); + InstArgType type, va_list args) { if (type == ARG_STR || type == ARG_BYTES) { size_t len; if (type == ARG_BYTES) { @@ -173,6 +181,7 @@ static bool emit_next_arg_bin(CompileError **err, FILE *stream, uint64_t val; switch (type) { case ARG_U8: + case ARG_BOOL: case ARG_I8: size = 8; val = va_arg(args, uint32_t); @@ -222,15 +231,16 @@ static bool emit_instruction(CompileError **err, FILE *stream, va_list args; va_start(args, nargs); for (size_t i = 0; i < nargs; ++i) { + InstArgType type = va_arg(args, InstArgType); switch (COMPILE_FORMAT) { case COMPILE_FORMAT_ASM: - success = emit_next_arg_c(err, stream, args); + success = emit_next_arg_c(err, stream, type, args); if (i < nargs - 1) { fprintf(stream, ", "); } break; case COMPILE_FORMAT_BIN: - success = emit_next_arg_bin(err, stream, args); + success = emit_next_arg_bin(err, stream, type, args); break; } if (!success) { @@ -410,8 +420,9 @@ static bool function_key_ok(AstSymbolNode *key, FunctionEntry *func) { if (func->allow_other_keys) { return true; } + const char *name = key->is_property ? key->name + 1 : key->name; for (size_t i = 0; i < func->nkeys; ++i) { - if (strcmp(key->name, func->keys[i]) == 0) { + if (strcmp(name, func->keys[i]) == 0) { return true; } } @@ -423,20 +434,27 @@ static bool function_arguments_ok(AstListNode *form, FunctionEntry *func, CompileError **err) { bool was_ok = true; size_t npositional = form->nchildren - 1; - for (size_t i = 1; i < form->nchildren; ++i) { - if (is_property_symbol(form->children[i])) { - if (!function_key_ok((AstSymbolNode *) form->children[i], func)) { - push_error_at_ast(err, form->children[i], COMPILE_WARNING, - "unknown key for function %s", func->name); - } - if (i == form->nchildren - 1) { - push_error_at_ast(err, form->children[i], COMPILE_ERROR, - "no value provided to key"); + bool had_key = false; + if (func->nkeys) { + for (size_t i = 1; i < form->nchildren; ++i) { + if (is_property_symbol(form->children[i])) { + if (!function_key_ok((AstSymbolNode *) form->children[i], func)) { + push_error_at_ast(err, form->children[i], COMPILE_WARNING, + "unknown key for function %s", func->name); + } + if (i == form->nchildren - 1) { + push_error_at_ast(err, form->children[i], COMPILE_ERROR, + "no value provided to key"); + was_ok = false; + } + had_key = true; + // skip next arg + ++i; + npositional -= 2; + } else if (had_key) { + // positional argument after key argument was_ok = false; } - // skip next arg - ++i; - npositional -= 2; } } if (npositional < func->nrequired) { @@ -553,6 +571,46 @@ static ssize_t intern_and_save(CompileEnvironment *env, FILE *stream, return 1; } +static void add_variable_to_lexenv(CompileLexenv *lexenv, const char *name, + LispReg *reg) { + for (size_t i = 0; i < lexenv->nvars; ++i) { + if (strcmp(name, lexenv->vars[i].name) == 0) { + lexenv->vars[i].reg = *reg; + return; + } + } + lexenv->vars = realloc(lexenv->vars, sizeof(*lexenv->vars) * + ++lexenv->nvars); + lexenv->vars[lexenv->nvars - 1].name = strdup(name); + lexenv->vars[lexenv->nvars - 1].reg = *reg; +} + +static ssize_t maybe_save_argument_reg(CompileEnvironment *env, + LispReg *arg_reg, FILE *stream, + CompileError **err) { + LispReg new_reg; + CompileLexenv *lexenv = env->lexenv_stack; + while (lexenv) { + for (size_t i = 0; i < lexenv->nvars; ++i) { + if (SAME_REG(&lexenv->vars[i].reg, arg_reg)) { + next_open_saved_reg(env, &new_reg); + add_variable_to_lexenv(env->lexenv_stack, lexenv->vars[i].name, + &new_reg); + return emit_instruction(err, stream, INST_MOV, 2, + ARG_REG, new_reg.type, new_reg.which, + ARG_REG, arg_reg->type, arg_reg->which) + ? 1 : -1; + } + } + if (lexenv->inherit) { + lexenv = lexenv->next; + } else { + lexenv = NULL; + } + } + return 0; +} + static FunctionEntry *get_or_make_function_entry(CompileEnvironment *env, AstSymbolNode *name, bool warn_on_redef, @@ -587,6 +645,7 @@ static bool parse_function_lambda_list(AstListNode *list, FunctionEntry *entry, entry->nrequired = 0; entry->optional = NULL; entry->noptional = 0; + entry->has_rest = false; if (list->parent.type == AST_TYPE_NULL) { return true; } @@ -674,7 +733,7 @@ static bool parse_function_lambda_list(AstListNode *list, FunctionEntry *entry, static FunctionEntry *parse_and_add_function(CompileEnvironment *env, AstListNode *form, - CompileError **err) { + CompileError **err) { if (form->nchildren < 2 || (form->children[2]->type != AST_TYPE_LIST && (form->children[2]->type != AST_TYPE_NULL))) { push_error_at_ast(err, (AstNode *) form, COMPILE_ERROR, @@ -697,14 +756,6 @@ static FunctionEntry *parse_and_add_function(CompileEnvironment *env, return entry; } -static void add_variable_to_lexenv(CompileLexenv *lexenv, const char *name, - LispReg *reg) { - lexenv->vars = realloc(lexenv->vars, sizeof(*lexenv->vars) * - ++lexenv->nvars); - lexenv->vars[lexenv->nvars - 1].name = strdup(name); - lexenv->vars[lexenv->nvars - 1].reg = *reg; -} - static ssize_t save_arg_register(CompileEnvironment *env, uint32_t which_arg, LispReg *target, FILE *stream, CompileError **err) { next_open_saved_reg(env, target); @@ -713,53 +764,194 @@ static ssize_t save_arg_register(CompileEnvironment *env, uint32_t which_arg, ARG_REG, REG_ARG, which_arg); } -static ssize_t add_lambda_list_to_lexenv(CompileEnvironment *env, - FunctionEntry *entry, - FILE *stream, - CompileError **err) { - ssize_t total_int = 0; - for (size_t i = 0; i < entry->nrequired; ++i) { - LispReg target; - ssize_t ec = save_arg_register(env, i, &target, stream, err); - if (ec < 0) { - return -1; +static ssize_t emit_generic_arg_list(CompileEnvironment *env, AstNode **args, + size_t arg_count, bool ignore_ret_vals, + FILE *stream, CompileError **err) { + ssize_t ec = 0; + uint32_t first_saved_arg; + if (ignore_ret_vals) { + first_saved_arg = env->first_available_val; + } else { + first_saved_arg = env->lexenv_stack->first_avaiable_saved; + } + size_t nused_saved = 0; + // do these first because they clobber the arg registers, so we need to save + // their output + for (size_t i = 0; i < arg_count; ++i) { + if (!args[i]) { + continue; } - total_int += ec; + LispReg reg = { + .type = REG_SAVED, + .which = first_saved_arg + nused_saved, + }; + switch (args[i]->type) { + case AST_TYPE_LIST: + case AST_TYPE_VECTOR: + case AST_TYPE_QUOTE: { + if (!ignore_ret_vals) { + env->lexenv_stack->first_avaiable_saved = reg.which + 1; + } + ++nused_saved; + ssize_t rv = byte_compile_form_internal(env, args[i], + ignore_ret_vals ? NULL : ®, + stream, err); + if (rv < 0) { + ec = -1; + goto done; + } + ec += rv; + } + break; + default: + break; + } + } + if (!ignore_ret_vals) { + nused_saved = 0; + for (size_t i = 0; i < arg_count; ++i) { + LispReg reg = { + .type = REG_ARG, + .which = i, + }; + ec += maybe_save_argument_reg(env, ®, stream, err); + if (!args[i]) { + emit_instruction(err, stream, INST_NIL, 1, + ARG_REG, reg.type, reg.which); + ++ec; + continue; + } + switch (args[i]->type) { + case AST_TYPE_LIST: + case AST_TYPE_VECTOR: + case AST_TYPE_QUOTE: + emit_instruction(err, stream, INST_MOV, 2, + ARG_REG, reg.type, reg.which, + ARG_REG, REG_SAVED, first_saved_arg + + nused_saved++); + ++ec; + break; + case AST_TYPE_SYMBOL: + case AST_TYPE_INT: + case AST_TYPE_FLOAT: + case AST_TYPE_STRING: + case AST_TYPE_NULL: { + ssize_t rv = byte_compile_form_internal(env, args[i], ®, + stream, err); + if (rv < 0) { + ec = -1; + goto done; + } + ec += rv; + } + break; + } + } + } + done: + env->lexenv_stack->first_avaiable_saved = first_saved_arg; + return ec; +} + +static void add_lambda_list_to_lexenv(CompileEnvironment *env, + FunctionEntry *entry, + FILE *stream, + CompileError **err) { + for (size_t i = 0; i < entry->nrequired; ++i) { + LispReg target = { + .type = REG_ARG, + .which = i, + }; add_variable_to_lexenv(env->lexenv_stack, entry->required[i], &target); } for (size_t i = 0; i < entry->noptional; ++i) { - LispReg target; - size_t pos = i + entry->nrequired; - ssize_t ec = save_arg_register(env, pos, &target, stream, err); - if (ec < 0) { - return -1; - } - total_int += ec; + uint32_t pos = i + entry->nrequired; + LispReg target = { + .type = REG_ARG, + .which = pos, + }; add_variable_to_lexenv(env->lexenv_stack, entry->optional[i], &target); } for (size_t i = 0; i < entry->nkeys; ++i) { - LispReg target; size_t pos = i + entry->nrequired + entry->noptional; - ssize_t ec = save_arg_register(env, pos, &target, stream, err); - if (ec < 0) { - return -1; - } - total_int += ec; + LispReg target = { + .type = REG_ARG, + .which = pos, + }; add_variable_to_lexenv(env->lexenv_stack, entry->keys[i], &target); } if (entry->rest) { - LispReg target; size_t pos = entry->nrequired + entry->noptional + entry->nkeys; - ssize_t ec = save_arg_register(env, pos, &target, stream, err); + LispReg target = { + .type = REG_ARG, + .which = pos, + }; + add_variable_to_lexenv(env->lexenv_stack, entry->rest, &target); + } +} + +static ssize_t emit_newfunction_for_entry(CompileEnvironment *env, + FunctionEntry *entry, + size_t int_forms, LispReg *target, + FILE *stream, CompileError **err) { + ssize_t total_insts = entry->nkeys + 1; + for (size_t i = 0; i < entry->nkeys; ++i) { + LispReg arg_reg = { + .type = REG_ARG, + .which = i, + }; + total_insts += maybe_save_argument_reg(env, &arg_reg, stream, err); + emit_instruction(err, stream, INST_INTERN_LIT, 2, + ARG_REG, arg_reg.type, arg_reg.which, + ARG_STR, entry->keys[i]); + } + emit_instruction(err, stream, INST_NEWFUNCTION_LIT, 7, + ARG_REG, target->type, target->which, + ARG_U32, (uint32_t) entry->nrequired, + ARG_U32, (uint32_t) entry->noptional, + ARG_U32, (uint32_t) entry->nkeys, + ARG_BOOL, (uint8_t) entry->allow_other_keys, + ARG_BOOL, (uint8_t) entry->has_rest, + ARG_U64, (uint64_t) int_forms); + return total_insts; +} + +// return the form at ret_index +static ssize_t compile_multi_form_internal(CompileEnvironment *env, + ssize_t ret_index, AstNode **forms, + size_t nforms, LispReg *target, + FILE *stream, CompileError **err) { + LispReg save_ret_reg; + ssize_t int_nforms = 0; + for (size_t i = 0; i < nforms; ++i) { + ssize_t ec; + if (i == ret_index && ret_index >= nforms - 1) { + ec = byte_compile_form_internal(env, forms[i], target, + stream, err); + } else if (i == ret_index) { + next_open_saved_reg(env, &save_ret_reg); + ec = byte_compile_form_internal(env, forms[i], + target ? &save_ret_reg : NULL, + stream, err); + } else { + ec = byte_compile_form_internal(env, forms[i], NULL, + stream, err); + } if (ec < 0) { return -1; } - total_int += ec; - add_variable_to_lexenv(env->lexenv_stack, entry->rest, &target); + int_nforms += ec; } - return 0; + if (ret_index < nforms - 1 && target) { + ++int_nforms; + emit_instruction(err, stream, INST_MOV, 2, + ARG_REG, target->type, target->which, + ARG_REG, save_ret_reg.type, save_ret_reg.which); + } + return int_nforms; } +// might want to clean this up at some point static ssize_t compile_defun_call(CompileEnvironment *env, AstListNode *form, LispReg *target, FILE *stream, CompileError **err) { @@ -776,71 +968,48 @@ static ssize_t compile_defun_call(CompileEnvironment *env, AstListNode *form, char *internal_code = NULL; FILE *int_stream = open_memstream(&internal_code, &internal_len); environment_enter_lexenv(env, false); - ssize_t int_nforms = add_lambda_list_to_lexenv(env, entry, int_stream, err); + add_lambda_list_to_lexenv(env, entry, int_stream, err); + size_t nchild_forms = form->nchildren - first_form; + LispReg ret_val_target = { + .type = REG_RET, + .which = 0, + }; + ssize_t int_nforms = + compile_multi_form_internal(env, nchild_forms - 1, form->children + first_form, + nchild_forms, &ret_val_target, int_stream, err); if (int_nforms < 0) { goto compile_error; } - for (size_t i = first_form; i < form->nchildren; ++i) { - ssize_t ec; - if (i < form->nchildren - 1) { - ec = byte_compile_form_internal(env, form->children[i], NULL, - int_stream, err); - } else { - LispReg ret_reg = { - .type = REG_RET, - .which = 0, - }; - ec = byte_compile_form_internal(env, form->children[i], &ret_reg, - int_stream, err); - } - if (ec < 0) { - goto compile_error; - } - int_nforms += ec; - } environment_leave_lexenv(env); + fclose(int_stream); LispReg backup_target = { .type = REG_VAL, - .which = env->first_available_var, + .which = env->first_available_val, }; if (!target) { target = &backup_target; } - ssize_t ec = emit_instruction(err, stream, INST_NEWFUNCTION_LIT, 2, - ARG_REG, target->type, target->which, - // account for the block and lexenv instructions - ARG_U64, (uint64_t) int_nforms + 5); - if (ec < 0) { - goto compile_error; - } - ec = emit_instruction(err, stream, INST_ENTER_LEXENV, 0); - if (ec < 0) { - goto compile_error; - } - LispReg name_reg; - ec = intern_and_save(env, stream, entry->name, entry->name_len, &name_reg, err); - if (ec < 0) { - goto compile_error; - } - ec = emit_instruction(err, stream, INST_ENTER_BLOCK, 2, - ARG_REG, name_reg.type, name_reg.which, - ARG_U64, (uint64_t) int_nforms + 1); - if (ec < 0) { - goto compile_error; - } - fclose(int_stream); - fwrite(internal_code, 1, internal_len, stream); + ssize_t surround_nforms = 0; + surround_nforms += emit_newfunction_for_entry(env, entry, + int_nforms + (int_nforms ? 2 : 0), + target, stream, err); + if (int_nforms) { + emit_instruction(err, stream, INST_ENTER_LEXENV, 1, + ARG_U64, (uint64_t) int_nforms + 1); + emit_instruction(err, stream, INST_ENTER_BLOCK, 2, + ARG_BYTES, entry->name_len, entry->name, + ARG_U64, (uint64_t) int_nforms); + fwrite(internal_code, 1, internal_len, stream); + surround_nforms += 2; + } free(internal_code); - ec = emit_instruction(err, stream, INST_LEAVE_BLOCK, 1, - ARG_REG, name_reg.type, name_reg.which); - if (ec < 0) { - return -1; - } - ec = emit_instruction(err, stream, INST_LEAVE_LEXENV, 0); - if (ec < 0) { - return -1; - } - return int_nforms + 5; + LispReg name_reg; + surround_nforms += intern_and_save(env, stream, entry->name, + entry->name_len, &name_reg, err); + emit_instruction(err, stream, INST_SET_FUNCTION, 2, + ARG_REG, target->type, target->which, + ARG_REG, name_reg.type, name_reg.which); + return int_nforms + surround_nforms + 6; compile_error: fclose(int_stream); free(internal_code); @@ -853,6 +1022,247 @@ static ssize_t compile_devar_call(CompileEnvironment *env, AstListNode *form, return 0; } +static ssize_t compile_progn_call(CompileEnvironment *env, AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + if (form->nchildren == 1 && target) { + return emit_instruction(err, stream, INST_NIL, 1, + ARG_REG, target->type, target->which) ? 1 : -1; + } else if (form->nchildren > 1){ + return compile_multi_form_internal(env, form->nchildren - 1, + form->children + 1, + form->nchildren - 1, target, + stream, err); + } + return 0; +} + +static ssize_t compile_if_call(CompileEnvironment *env, AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + if (form->nchildren == 1) { + push_error_at_ast(err, (AstNode *) form, COMPILE_ERROR, + "no condition for if form"); + return -1; + } + LispReg cond_reg = { + .type = REG_VAL, + .which = env->first_available_val, + }; + ssize_t total_ec = 0; + ssize_t ec; + ec = byte_compile_form_internal(env, form->children[1], + form->nchildren == 2 ? NULL : &cond_reg, + stream, err); + if (ec < 0) { + return -1; + } + total_ec += ec; + if (form->nchildren == 2) { + if (target) { + emit_instruction(err, stream, INST_NIL, 1, + ARG_REG, target->type, target->which); + ++total_ec; + } + return total_ec; + } + char *true_bytes; + size_t true_len; + size_t true_ninst = 0; + FILE *true_stream = open_memstream(&true_bytes, &true_len); + true_ninst = byte_compile_form_internal(env, form->children[2], target, + true_stream, err); + fclose(true_stream); + if (true_ninst < 0) { + free(true_bytes); + return -1; + } + total_ec += true_ninst; + emit_instruction(err, stream, INST_CJMP, 2, + ARG_REG, cond_reg.type, cond_reg.which, + ARG_I64, (int64_t) true_ninst + 1); + ++total_ec; + fwrite(true_bytes, 1, true_len, stream); + free(true_bytes); + char *false_bytes; + size_t false_len; + size_t false_ninst = 0; + FILE *false_stream = open_memstream(&false_bytes, &false_len); + size_t false_nforms = form->nchildren - 3; + false_ninst = compile_multi_form_internal(env, false_nforms - 1, + form->children + 3, false_nforms, + target, false_stream, err); + fclose(false_stream); + if (false_ninst < 0) { + free(false_bytes); + return -1; + } + total_ec += false_ninst; + emit_instruction(err, stream, INST_JMP, 1, + ARG_I64, (int64_t) false_ninst); + ++total_ec; + fwrite(false_bytes, 1, false_len, stream); + free(false_bytes); + return total_ec; +} + +static bool inst_func_eq(const char *func, const char *inst) { + while (*func && *inst) { + if (!(*func == '-' && *inst == '_') && + tolower(*func) != tolower(*inst)) { + return false; + } + ++func; + ++inst; + } + return !*func && !*inst; +} + +static LispInst find_instruction_by_func_name(const char *name) { + for (size_t i = 0; i < N_INSTRUCTIONS; ++i) { + if (inst_func_eq(name, INSTRUCTION_NAMES[i])) { + return i; + } + } + return N_INSTRUCTIONS; +} + +static ssize_t compile_list_like_call(CompileEnvironment *env, AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + AstSymbolNode *name = (AstSymbolNode *) form->children[0]; + LispInst inst = find_instruction_by_func_name(name->name); + if (inst >= N_INSTRUCTIONS) { + return -1; + } + ssize_t ec = emit_generic_arg_list(env, form->children + 1, + form->nchildren - 1, !target, stream, err); + if (target) { + ec += emit_instruction(err, stream, inst, 2, + ARG_REG, target->type, target->which, + ARG_U64, (uint64_t) form->nchildren - 1); + } + return ec; +} + +static ssize_t compile_special_binary_func_call(CompileEnvironment *env, + AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + AstSymbolNode *name = (AstSymbolNode *) form->children[0]; + if (form->nchildren != 3) { + push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR, + "%s expects exactly 2 arguments", name->name); + } + LispInst inst = find_instruction_by_func_name(name->name); + ssize_t arg_nforms = emit_generic_arg_list(env, form->children + 1, + form->nchildren - 1, !target, + stream, err); + if (target) { + emit_instruction(err, stream, inst, 3, + ARG_REG, target->type, target->which, + ARG_REG, REG_ARG, (uint32_t) 0, + ARG_REG, REG_ARG, (uint32_t) 1); + ++arg_nforms; + } + return arg_nforms; +} + +static ssize_t compile_special_unary_func_call(CompileEnvironment *env, + AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + AstSymbolNode *name = (AstSymbolNode *) form->children[0]; + if (form->nchildren != 2) { + push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR, + "%s expects exactly 1 argument", name->name); + } + LispInst inst = find_instruction_by_func_name(name->name); + LispReg temp_reg = { + .type = REG_VAL, + .which = env->first_available_val, + }; + ssize_t arg_nforms = byte_compile_form_internal(env, form->children[1], + !target ? NULL : &temp_reg, + stream, err); + if (target) { + emit_instruction(err, stream, inst, 2, + ARG_REG, target->type, target->which, + ARG_REG, temp_reg.type, temp_reg.which); + ++arg_nforms; + } + return arg_nforms; +} + +static ssize_t compile_math_comparison_call(CompileEnvironment *env, AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + AstSymbolNode *name = (AstSymbolNode *) form->children[0]; + if (form->nchildren != 3) { + push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR, + "%s expects exactly 2 argument", name->name); + } + const char *real_name; + if (name->name_length == 1) { + switch (*name->name) { + case '<': + real_name = "NUM_LT"; + break; + case '=': + real_name = "NUM_EQ"; + break; case '>': + real_name = "NUM_GT"; + break; + } + } else if (*name->name == '<') { + real_name = "NUM_LE"; + } else if (*name->name == '>') { + real_name = "NUM_GE"; + } + LispInst inst = find_instruction_by_func_name(real_name); + ssize_t arg_nforms = emit_generic_arg_list(env, form->children + 1, + form->nchildren - 1, !target, + stream, err); + if (target) { + emit_instruction(err, stream, inst, 3, + ARG_REG, target->type, target->which, + ARG_REG, REG_ARG, (uint32_t) 0, + ARG_REG, REG_ARG, (uint32_t) 1); + ++arg_nforms; + } + return arg_nforms; +} + +static ssize_t compile_math_arithmatic_call(CompileEnvironment *env, AstListNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + const char *real_name; + AstSymbolNode *name = (AstSymbolNode *) form->children[0]; + switch (*name->name) { + case '+': + real_name = "add"; + break; + case '-': + real_name = "sub"; + break; + case '*': + real_name = "mul"; + break; + case '/': + real_name = "div"; + break; + } + LispInst inst = find_instruction_by_func_name(real_name); + ssize_t ec = emit_generic_arg_list(env, form->children + 1, + form->nchildren - 1, !target, stream, err); + if (target) { + ec += emit_instruction(err, stream, inst, 2, + ARG_REG, target->type, target->which, + ARG_U64, (uint64_t) form->nchildren - 1); + } + return ec; +} + static const struct { const char *name; ssize_t (*handler)(CompileEnvironment *env, AstListNode *form, @@ -860,6 +1270,56 @@ static const struct { } NATIVE_FUNCTIONS[] = { {"defun", compile_defun_call}, {"devar", compile_devar_call}, + {"progn", compile_progn_call}, + {"if", compile_if_call}, + + {"list", compile_list_like_call}, + {"vector", compile_list_like_call}, + + {"+", compile_math_arithmatic_call}, + {"-", compile_math_arithmatic_call}, + {"*", compile_math_arithmatic_call}, + {"/", compile_math_arithmatic_call}, + + {"<=", compile_math_comparison_call}, + {"<", compile_math_comparison_call}, + {"=", compile_math_comparison_call}, + {">=", compile_math_comparison_call}, + {">", compile_math_comparison_call}, + + {"eq", compile_special_binary_func_call}, + {"int-div", compile_special_binary_func_call}, + {"mod", compile_special_binary_func_call}, + {"pow", compile_special_binary_func_call}, + {"bitand", compile_special_binary_func_call}, + {"bitor", compile_special_binary_func_call}, + {"bitxor", compile_special_binary_func_call}, + + {"string", compile_special_unary_func_call}, + {"int", compile_special_unary_func_call}, + {"float", compile_special_unary_func_call}, + {"intern", compile_special_unary_func_call}, + {"type-of", compile_special_unary_func_call}, + {"get-function", compile_special_unary_func_call}, + {"get-value", compile_special_unary_func_call}, + {"functionp", compile_special_unary_func_call}, + {"boundp", compile_special_unary_func_call}, + {"not", compile_special_unary_func_call}, + {"car", compile_special_unary_func_call}, + {"cdr", compile_special_unary_func_call}, + {"recip", compile_special_unary_func_call}, + {"sqrt", compile_special_unary_func_call}, + {"ln", compile_special_unary_func_call}, + {"exp", compile_special_unary_func_call}, + {"sin", compile_special_unary_func_call}, + {"cos", compile_special_unary_func_call}, + {"tan", compile_special_unary_func_call}, + {"asin", compile_special_unary_func_call}, + {"acos", compile_special_unary_func_call}, + {"atan", compile_special_unary_func_call}, + {"bigneg", compile_special_unary_func_call}, + {"lsh", compile_special_unary_func_call}, + {"ash", compile_special_unary_func_call}, }; const size_t N_NATIVE_FUNCTIONS = sizeof(NATIVE_FUNCTIONS) / sizeof(NATIVE_FUNCTIONS[0]); @@ -897,18 +1357,34 @@ static ssize_t compile_null_node(CompileEnvironment *env, AstNode *form, ARG_REG, target->type, target->which) ? 1 : -1; } +static ssize_t compile_quoted_symbol_node(CompileEnvironment *env, AstSymbolNode *form, + LispReg *target, FILE *stream, + CompileError **err) { + return emit_instruction(err, stream, INST_INTERN_LIT, 2, + ARG_REG, target->type, target->which, + ARG_BYTES, form->name_length, form->name) ? 1 : -1; +} + static ssize_t compile_symbol_node(CompileEnvironment *env, AstSymbolNode *form, LispReg *target, FILE *stream, CompileError **err) { if (strcmp(form->name, "t") == 0) { return emit_instruction(err, stream, INST_T, 1, ARG_REG, target->type, target->which) ? 1 : -1; + } else if (form->is_property) { + return compile_quoted_symbol_node(env, form, target, stream, err); } LispReg *var_reg = lookup_local_var(env, form->name); if (var_reg) { if (SAME_REG(var_reg, target)) { return 0; } + // if the target is not an arg register, we can just set the target to + // the register that the variable is already in + if (target->type != REG_ARG) { + *target = *var_reg; + return 0; + } return emit_instruction(err, stream, INST_MOV, 2, ARG_REG, target->type, target->which, ARG_REG, var_reg->type, var_reg->which) ? 1 : -1; @@ -930,14 +1406,6 @@ static ssize_t compile_symbol_node(CompileEnvironment *env, AstSymbolNode *form, } } -static ssize_t compile_quoted_symbol_node(CompileEnvironment *env, AstSymbolNode *form, - LispReg *target, FILE *stream, - CompileError **err) { - return emit_instruction(err, stream, INST_INTERN_LIT, 2, - ARG_REG, target->type, target->which, - ARG_BYTES, form->name_length, form->name) ? 1 : -1; -} - static ssize_t compile_quoted_quote_node(CompileEnvironment *env, AstQuoteNode *form, LispReg *target, FILE *stream, CompileError **err) { @@ -952,7 +1420,30 @@ static ssize_t compile_quoted_quote_node(CompileEnvironment *env, AstQuoteNode * return -1; } LispReg quote_sym_reg; - ssize_t ec2 = intern_and_save(env, stream, "quote", sizeof("quote") - 1, + const char *name; + size_t name_len; + switch (form->type) { + case AST_QUOTE_COMMA: + name = "bq-comma"; + name_len = sizeof("bq-comma") - 1; + break; + case AST_QUOTE_SPLICE: + name = "bq-splice"; + name_len = sizeof("bq-splice") - 1; + break; + case AST_QUOTE_NORM: + name = "quote"; + name_len = sizeof("quote") - 1; + break; + case AST_QUOTE_BACK: + name = "backquote"; + name_len = sizeof("backquote") - 1; + break; + case AST_QUOTE_NONE: + // shouldn't happen + return 0; + } + ssize_t ec2 = intern_and_save(env, stream, name, name_len, "e_sym_reg, err); if (ec2 < 0) { return -1; @@ -982,21 +1473,11 @@ static ssize_t compile_quote_node(CompileEnvironment *env, AstQuoteNode *form, env->quote = AST_QUOTE_NONE; return ec; } - case AST_QUOTE_COMMA: { - if (env->quote != AST_QUOTE_BACK) { - push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR, - "comma not inside backquote"); - return -1; - } - env->quote = AST_QUOTE_NONE; - ssize_t ec = byte_compile_form_internal(env, form->form, target, - stream, err); - env->quote = AST_QUOTE_BACK; - return ec; - } + case AST_QUOTE_COMMA: case AST_QUOTE_SPLICE: - // TODO handle this - return 0; + push_error_at_ast(err, (AstNode *) form, COMPILE_ERROR, + "comma or splice not inside a backquote"); + return -1; case AST_QUOTE_NONE: // shouldn't happen, do nothing return 0; @@ -1027,6 +1508,22 @@ static ssize_t compile_quoted_list_node(CompileEnvironment *env, AstListNode *fo return total_emitted + 1; } +static ssize_t compile_backquote_quote_node(CompileEnvironment *env, + AstQuoteNode *form, LispReg *target, + FILE *stream, CompileError **err) { + if (form->type == AST_QUOTE_COMMA) { + env->quote = AST_QUOTE_NONE; + ssize_t ec = byte_compile_form_internal(env, form->form, target, + stream, err); + env->quote = AST_QUOTE_BACK; + return ec; + } else if (form->type == AST_QUOTE_SPLICE) { + // TODO handle this + return 0; + } + return compile_quoted_quote_node(env, form, target, stream, err); +} + static ssize_t compile_vector_node(CompileEnvironment *env, AstVectorNode *form, LispReg *target, FILE *stream, CompileError **err) { @@ -1076,6 +1573,9 @@ static ssize_t compile_atom_node(CompileEnvironment *env, AstNode *form, case AST_TYPE_QUOTE: if (env->quote == AST_QUOTE_NONE) { return compile_quote_node(env, (AstQuoteNode *) form, target, stream, err); + } else if (env->quote == AST_QUOTE_BACK) { + return compile_backquote_quote_node(env, (AstQuoteNode *) form, + target, stream,err); } else { return compile_quoted_quote_node(env, (AstQuoteNode *) form, target, stream, err); @@ -1094,8 +1594,29 @@ static ssize_t compile_atom_node(CompileEnvironment *env, AstNode *form, } } +static AstListNode *make_list_func_node(AstNode **nodes, size_t count) { + AstListNode *node = malloc(sizeof(AstListNode)); + node->parent.type = AST_TYPE_LIST; + node->parent.line = 0; + node->parent.col = 0; + node->nchildren = count + 1; + node->children = malloc(sizeof(AstNode *) * (count + 1)); + node->children[0] = malloc(sizeof(AstSymbolNode)); + node->children[0]->type = AST_TYPE_SYMBOL; + node->children[0]->line = 0; + node->children[0]->col = 0; + ((AstSymbolNode *) node->children[0])->is_property = false; + ((AstSymbolNode *) node->children[0])->name = "list"; + ((AstSymbolNode *) node->children[0])->name_length = sizeof("list") - 1; + ((AstSymbolNode *) node->children[0])->skip_free = true; + memcpy(node->children + 1, nodes, count * sizeof(AstNode *)); + return node; +} + static ssize_t compile_generic_function_call(CompileEnvironment *env, - AstListNode *form, FILE *stream, + AstListNode *form, + LispReg *target, + FILE *stream, CompileError **err) { size_t emitted_count = 0; AstSymbolNode *name = (AstSymbolNode *) form->children[0]; @@ -1107,98 +1628,78 @@ static ssize_t compile_generic_function_call(CompileEnvironment *env, // arguments invalid, give up compiling form return -1; } - // first, thigs that clobber the "arg" registers - for (size_t i = 1; i < form->nchildren; ++i) { - switch (form->children[i]->type) { - case AST_TYPE_QUOTE: - case AST_TYPE_VECTOR: - case AST_TYPE_LIST: { - bool success = true; - LispReg reg = { - .type = REG_ARG, - .which = i - 1, - }; - ssize_t ec = byte_compile_form_internal(env, form->children[i], - ®, stream, err); - if (!success) { - return -1; - } - } - break; - default: - break; - } - } - // TODO make sure that rest and key arguments go at the end - // then all the other types of arguments - for (size_t i = 1; i < form->nchildren; ++i) { - switch (form->children[i]->type) { - case AST_TYPE_SYMBOL: - case AST_TYPE_INT: - case AST_TYPE_FLOAT: - case AST_TYPE_STRING: - case AST_TYPE_NULL: { - bool success = true; - LispReg reg = { - .type = REG_ARG, - .which = i - 1, - }; - ssize_t ec = byte_compile_form_internal(env, form->children[i], - ®, stream, err); - if (!success) { - return -1; - } - } - break; - case AST_TYPE_QUOTE: - case AST_TYPE_VECTOR: - case AST_TYPE_LIST: - break; - } - if (form->children[i]->type != AST_TYPE_LIST) { - } - } + ssize_t ec = 0; + ec += emit_generic_arg_list(env, form->children + 1, form->nchildren - 1, + false, stream, err); LispReg name_reg; - ssize_t ec = intern_and_save(env, stream, name->name, name->name_length, - &name_reg, err); + ec += intern_and_save(env, stream, name->name, name->name_length, + &name_reg, err); if (ec == -1) { return -1; } - emit_instruction(err, stream, INST_FUNCALL, 2, - ARG_REG, name_reg.type, name_reg.which, - ARG_U64, form->nchildren - 1); - return 0; + ec += emit_instruction(err, stream, INST_FUNCALL, 2, + ARG_REG, name_reg.type, name_reg.which, + ARG_U64, form->nchildren - 1); + if (target && target->type != REG_VAL) { + ec += emit_instruction(err, stream, INST_MOV, 2, + ARG_REG, target->type, target->which, + ARG_REG, REG_RET, (uint32_t) 0); + } else if (target) { + target->type = REG_RET; + target->which = 0; + } + return ec; } static ssize_t byte_compile_form_internal(CompileEnvironment *env, AstNode *form, LispReg *target, FILE *stream, CompileError **err) { + ssize_t extra_ins = 0; + // if target is one of the arg registers and it contains a local variable, + // we want to save it to a saved register + if (target && target->type == REG_ARG) { + extra_ins = maybe_save_argument_reg(env, target, stream, err); + if (extra_ins < 0) { + return -1; + } + } if (form->type != AST_TYPE_LIST) { - if (!target) { + if (!target && (form->type != AST_TYPE_QUOTE || + ((AstQuoteNode *) form)->type == AST_TYPE_QUOTE)) { // no use in compiling a literal if the value will just be thrown out return 0; } - return compile_atom_node(env, form, target, stream, err); + return extra_ins + compile_atom_node(env, form, target, stream, err); } else if (env->quote != AST_QUOTE_NONE) { - return compile_quoted_list_node(env, (AstListNode *) form, target, - stream, err); + if (!target) { + return 0; + } + return extra_ins + compile_quoted_list_node(env, (AstListNode *) form, target, + stream, err); } else if (is_function_call(form)) { for (size_t i = 0; i < N_NATIVE_FUNCTIONS; ++i) { if (is_function_call_named(NATIVE_FUNCTIONS[i].name, form)) { - return NATIVE_FUNCTIONS[i].handler(env, (AstListNode *) form, - target, stream, err); + return extra_ins + NATIVE_FUNCTIONS[i].handler(env, + (AstListNode *) form, + target, stream, err); } } - return compile_generic_function_call(env, (AstListNode *) form, stream, err); + return extra_ins + compile_generic_function_call(env, + (AstListNode *) form, + target, + stream, err); } - return 0; + return extra_ins; } ssize_t byte_compile_form(CompileEnvironment *env, AstNode *form, FILE *stream, CompileError **err) { - env->first_available_var = 0; + env->first_available_val = 0; env->quote = AST_QUOTE_NONE; size_t ninst = byte_compile_form_internal(env, form, NULL, stream, err); + if (COMPILE_FORMAT == COMPILE_FORMAT_ASM) { + fputc('\n', stream); + } // reverse err if (err) { CompileError *cur_err = *err; @@ -1213,3 +1714,10 @@ ssize_t byte_compile_form(CompileEnvironment *env, AstNode *form, FILE *stream, } return ninst; } + +void load_toplevel_definition(CompileEnvironment *env, AstNode *form, + CompileError **err) { + if (is_function_call_named("defun", form)) { + parse_and_add_function(env, (AstListNode *) form, err); + } +} diff --git a/bootstrap/compile.h b/bootstrap/compile.h index d5f0408..33efbab 100644 --- a/bootstrap/compile.h +++ b/bootstrap/compile.h @@ -98,7 +98,7 @@ typedef struct { CompileLexenv *lexenv_stack; // the minimum var register that is safe to use - uint32_t first_available_var; + uint32_t first_available_val; AstQuoteType quote; } CompileEnvironment; @@ -107,5 +107,7 @@ void destroy_compile_environment(CompileEnvironment *env); ssize_t byte_compile_form(CompileEnvironment *env, AstNode *form, FILE *stream, CompileError **err); +void load_toplevel_definition(CompileEnvironment *env, AstNode *form, + CompileError **err); #endif diff --git a/bootstrap/constants.c b/bootstrap/constants.c index 7e8eb66..f12e1e9 100644 --- a/bootstrap/constants.c +++ b/bootstrap/constants.c @@ -9,29 +9,36 @@ const char *INSTRUCTION_NAMES[] = { [INST_CONS] = "CONS", [INST_LIST] = "LIST", [INST_VECTOR] = "VECTOR", + [INST_LENGTH] = "LENGTH", [INST_INTERN_LIT] = "INTERN_LIT", [INST_INTERN_DYN] = "INTERN_DYN", + [INST_TYPE_OF] = "TYPE_OF", [INST_SYMBOL_NAME] = "SYMBOL_NAME", [INST_MOV] = "MOV", [INST_FUNCALL] = "FUNCALL", [INST_RETVAL_COUNT] = "RETVAL_COUNT", + [INST_GET_RETVAL_COUNT] = "GET_RETVAL_COUNT", [INST_ENTER_LEXENV] = "ENTER_LEXENV", - [INST_ENTER_INHERITED_LEXENV] = "ENTER_INHERITED_LEXENV", - [INST_LEAVE_LEXENV] = "LEAVE_LEXENV", [INST_ENTER_BLOCK] = "ENTER_BLOCK", [INST_LEAVE_BLOCK] = "LEAVE_BLOCK", [INST_SET_VALUE] = "SET_VALUE", [INST_SET_FUNCTION] = "SET_FUNCTION", [INST_GET_VALUE] = "GET_VALUE", [INST_GET_FUNCTION] = "GET_FUNCTION", + [INST_BOUNDP] = "BOUNDP", + [INST_FUNCTIONP] = "FUNCTIONP", [INST_NEWFUNCTION_LIT] = "NEWFUNCTION_LIT", [INST_NEWFUNCTION_DYN] = "NEWFUNCTION_DYN", [INST_PUT] = "PUT", [INST_GET] = "GET", [INST_AND] = "AND", + [INST_ANDN] = "ANDN", [INST_OR] = "OR", + [INST_ORN] = "ORN", [INST_XOR] = "XOR", + [INST_XORN] = "XORN", [INST_NOT] = "NOT", + [INST_JMP] = "JMP", [INST_CJMP] = "CJMP", [INST_CAR] = "CAR", [INST_CDR] = "CDR", @@ -41,13 +48,39 @@ const char *INSTRUCTION_NAMES[] = { [INST_GETELT_DYN] = "GETELT_DYN", [INST_SETELT_LIT] = "SETELT_LIT", [INST_SETELT_DYN] = "SETELT_DYN", - [INST_EQ_TWO] = "EQ_TWO", - [INST_EQ_N] = "EQ_N", + [INST_EQ] = "EQ", [INST_NUM_GT] = "NUM_GT", [INST_NUM_GE] = "NUM_GE", [INST_NUM_EQ] = "NUM_EQ", [INST_NUM_LE] = "NUM_LE", [INST_NUM_LT] = "NUM_LT", + [INST_ADD] = "ADD", + [INST_ADDN] = "ADDN", + [INST_SUB] = "SUB", + [INST_SUBN] = "SUBN", + [INST_MUL] = "MUL", + [INST_MULN] = "MULN", + [INST_DIV] = "DIV", + [INST_INT_DIV] = "INT_DIV", + [INST_RECIP] = "RECIP", + [INST_MOD] = "MOD", + [INST_SQRT] = "SQRT", + [INST_POW] = "POW", + [INST_LN] = "LN", + [INST_EXP] = "EXP", + [INST_SIN] = "SIN", + [INST_COS] = "COS", + [INST_TAN] = "TAN", + [INST_ASIN] = "ASIN", + [INST_ACOS] = "ACOS", + [INST_ATAN] = "ATAN", + [INST_BITAND] = "BITAND", + [INST_BITOR] = "BITOR", + [INST_BITXOR] = "BITXOR", + [INST_BITNOR] = "BITNOR", + [INST_BITNEG] = "BITNEG", + [INST_LSH] = "LSH", + [INST_ASH] = "ASH", }; const char *REGISTER_NAMES[] = { diff --git a/bootstrap/constants.h b/bootstrap/constants.h index 63d2a45..64d919d 100644 --- a/bootstrap/constants.h +++ b/bootstrap/constants.h @@ -5,55 +5,87 @@ typedef uint16_t LispInst; - -#define INST_NIL ((LispInst) 0) -#define INST_T ((LispInst) 1) -#define INST_STRING ((LispInst) 2) -#define INST_INT ((LispInst) 3) -#define INST_FLOAT ((LispInst) 4) -#define INST_CONS ((LispInst) 5) -#define INST_LIST ((LispInst) 6) -#define INST_VECTOR ((LispInst) 7) -#define INST_INTERN_LIT ((LispInst) 8) -#define INST_INTERN_DYN ((LispInst) 9) -#define INST_SYMBOL_NAME ((LispInst) 10) -#define INST_MOV ((LispInst) 11) -#define INST_FUNCALL ((LispInst) 12) -#define INST_RETVAL_COUNT ((LispInst) 13) -#define INST_ENTER_LEXENV ((LispInst) 14) -#define INST_ENTER_INHERITED_LEXENV ((LispInst) 15) -#define INST_LEAVE_LEXENV ((LispInst) 16) -#define INST_ENTER_BLOCK ((LispInst) 17) -#define INST_LEAVE_BLOCK ((LispInst) 18) -#define INST_SET_VALUE ((LispInst) 19) -#define INST_SET_FUNCTION ((LispInst) 20) -#define INST_GET_VALUE ((LispInst) 21) -#define INST_GET_FUNCTION ((LispInst) 22) -#define INST_NEWFUNCTION_LIT ((LispInst) 23) -#define INST_NEWFUNCTION_DYN ((LispInst) 24) -#define INST_PUT ((LispInst) 25) -#define INST_GET ((LispInst) 26) -#define INST_AND ((LispInst) 27) -#define INST_OR ((LispInst) 28) -#define INST_XOR ((LispInst) 29) -#define INST_NOT ((LispInst) 30) -#define INST_CJMP ((LispInst) 31) -#define INST_CAR ((LispInst) 32) -#define INST_CDR ((LispInst) 33) -#define INST_SETCAR ((LispInst) 34) -#define INST_SETCDR ((LispInst) 35) -#define INST_GETELT_LIT ((LispInst) 36) -#define INST_GETELT_DYN ((LispInst) 37) -#define INST_SETELT_LIT ((LispInst) 38) -#define INST_SETELT_DYN ((LispInst) 39) -#define INST_EQ_TWO ((LispInst) 40) -#define INST_EQ_N ((LispInst) 41) -#define INST_NUM_GT ((LispInst) 42) -#define INST_NUM_GE ((LispInst) 43) -#define INST_NUM_EQ ((LispInst) 44) -#define INST_NUM_LE ((LispInst) 45) -#define INST_NUM_LT ((LispInst) 46) -#define N_INSTRUCTIONS ((LispInst) 47) +#define INST_NIL ((LispInst) 0) +#define INST_T ((LispInst) 1) +#define INST_STRING ((LispInst) 2) +#define INST_INT ((LispInst) 3) +#define INST_FLOAT ((LispInst) 4) +#define INST_CONS ((LispInst) 5) +#define INST_LIST ((LispInst) 6) +#define INST_VECTOR ((LispInst) 7) +#define INST_LENGTH ((LispInst) 8) +#define INST_INTERN_LIT ((LispInst) 9) +#define INST_INTERN_DYN ((LispInst) 10) +#define INST_TYPE_OF ((LispInst) 11) +#define INST_SYMBOL_NAME ((LispInst) 12) +#define INST_MOV ((LispInst) 13) +#define INST_FUNCALL ((LispInst) 14) +#define INST_RETVAL_COUNT ((LispInst) 15) +#define INST_GET_RETVAL_COUNT ((LispInst) 16) +#define INST_ENTER_LEXENV ((LispInst) 17) +#define INST_ENTER_BLOCK ((LispInst) 18) +#define INST_LEAVE_BLOCK ((LispInst) 19) +#define INST_SET_VALUE ((LispInst) 20) +#define INST_SET_FUNCTION ((LispInst) 21) +#define INST_GET_VALUE ((LispInst) 22) +#define INST_GET_FUNCTION ((LispInst) 23) +#define INST_BOUNDP ((LispInst) 24) +#define INST_FUNCTIONP ((LispInst) 25) +#define INST_NEWFUNCTION_LIT ((LispInst) 26) +#define INST_NEWFUNCTION_DYN ((LispInst) 27) +#define INST_PUT ((LispInst) 28) +#define INST_GET ((LispInst) 29) +#define INST_AND ((LispInst) 30) +#define INST_ANDN ((LispInst) 31) +#define INST_OR ((LispInst) 32) +#define INST_ORN ((LispInst) 33) +#define INST_XOR ((LispInst) 34) +#define INST_XORN ((LispInst) 35) +#define INST_NOT ((LispInst) 36) +#define INST_JMP ((LispInst) 37) +#define INST_CJMP ((LispInst) 38) +#define INST_CAR ((LispInst) 39) +#define INST_CDR ((LispInst) 40) +#define INST_SETCAR ((LispInst) 41) +#define INST_SETCDR ((LispInst) 42) +#define INST_GETELT_LIT ((LispInst) 43) +#define INST_GETELT_DYN ((LispInst) 44) +#define INST_SETELT_LIT ((LispInst) 45) +#define INST_SETELT_DYN ((LispInst) 46) +#define INST_EQ ((LispInst) 47) +#define INST_NUM_GT ((LispInst) 48) +#define INST_NUM_GE ((LispInst) 49) +#define INST_NUM_EQ ((LispInst) 50) +#define INST_NUM_LE ((LispInst) 51) +#define INST_NUM_LT ((LispInst) 52) +#define INST_ADD ((LispInst) 53) +#define INST_ADDN ((LispInst) 54) +#define INST_SUB ((LispInst) 55) +#define INST_SUBN ((LispInst) 56) +#define INST_MUL ((LispInst) 57) +#define INST_MULN ((LispInst) 58) +#define INST_DIV ((LispInst) 59) +#define INST_INT_DIV ((LispInst) 60) +#define INST_RECIP ((LispInst) 61) +#define INST_MOD ((LispInst) 62) +#define INST_SQRT ((LispInst) 63) +#define INST_POW ((LispInst) 64) +#define INST_LN ((LispInst) 65) +#define INST_EXP ((LispInst) 66) +#define INST_SIN ((LispInst) 67) +#define INST_COS ((LispInst) 68) +#define INST_TAN ((LispInst) 69) +#define INST_ASIN ((LispInst) 70) +#define INST_ACOS ((LispInst) 71) +#define INST_ATAN ((LispInst) 72) +#define INST_BITAND ((LispInst) 73) +#define INST_BITOR ((LispInst) 74) +#define INST_BITXOR ((LispInst) 75) +#define INST_BITNOR ((LispInst) 76) +#define INST_BITNEG ((LispInst) 77) +#define INST_LSH ((LispInst) 78) +#define INST_ASH ((LispInst) 79) +#define N_INSTRUCTIONS ((LispInst) 80) extern const char *INSTRUCTION_NAMES[]; diff --git a/bootstrap/main.c b/bootstrap/main.c index 2151e5d..60f3f35 100644 --- a/bootstrap/main.c +++ b/bootstrap/main.c @@ -4,6 +4,23 @@ #include "ast.h" #include "compile.h" +static const char NATIVE_DEFUNS[] = + "(defun native-write (bytes-or-string &key stream start end))"; + + +static void register_native_defuns(CompileEnvironment *env) { + FILE *in = fmemopen((char *) NATIVE_DEFUNS, sizeof(NATIVE_DEFUNS) - 1, "r"); + TokenStream *stream = make_token_stream(in); + while (!token_stream_is_eof(stream)) { + AstNode *node = ast_next_toplevel(stream, NULL); + if (node) { + load_toplevel_definition(env, node, NULL); + } + destroy_ast_node(node); + } + destroy_token_stream(stream); +} + int main(int argc, const char **argv) { ast_init_parser(); FILE *file = fopen("bootstrap/test.sl", "r"); @@ -13,6 +30,7 @@ int main(int argc, const char **argv) { TokenStream *stream = make_token_stream(file); COMPILE_FORMAT = COMPILE_FORMAT_ASM; CompileEnvironment *env = make_compile_environment(); + register_native_defuns(env); AstErrorList *ast_errs; CompileError *comp_errs; while (!token_stream_is_eof(stream)) { diff --git a/bootstrap/test.sl b/bootstrap/test.sl index 6c94841..3d21ff7 100644 --- a/bootstrap/test.sl +++ b/bootstrap/test.sl @@ -1,3 +1,14 @@ -(defun name_here () - "Test defun" - "a") +(defun stringp (obj) + (eq (type-of obj) 'string)) + +(defun greet-with (message &key stream) + (native-write message :stream stream)) + +(defun be-sad () + (native-write "Ok, bye then\n")) + +(if (stringp *msg*) + (progn + (greet-with *msg*)) + (be-sad)) +