Some stuff
This commit is contained in:
parent
550a6131e1
commit
3c3a9ff2d5
273
BYTECODE.md
Normal file
273
BYTECODE.md
Normal file
@ -0,0 +1,273 @@
|
||||
# Bytecode Documentation
|
||||
|
||||
## Instructions
|
||||
Each instruction consists of a mnemonic and some number of arguments. The
|
||||
numeric value for each instruction is the same length, but the arguments can be
|
||||
different lengths, and each instruction can have a different number of
|
||||
arguments.
|
||||
|
||||
In this document, the layout for data is specified by a number of fields
|
||||
separated by commas. When represented in raw bits, these fields are next to each
|
||||
other with no padding in between. For example:
|
||||
```text
|
||||
length:u64, data:[i8]
|
||||
```
|
||||
represents an unsigned 64 bit integer field called "length" and a field called
|
||||
"data" which is an array of signed 8 bit integers. The value right before an
|
||||
array is the value which determines its length.
|
||||
|
||||
The argument types are as follows:
|
||||
| Name | Description |
|
||||
|:------------------|:--------------------------------------|
|
||||
| i8, i16, i32, i64 | 8, 16, 32, or 64 bit signed integer |
|
||||
| u8, u16, u32, u64 | 8, 16, 32, or 64 bit unsigned integer |
|
||||
| double | Equivilant to the C type "double" |
|
||||
| reg | register (format: type:u8,which:u32) |
|
||||
| str | string (format: length:u64,data:[i8]) |
|
||||
|
||||
### Registers
|
||||
Most instructions take register numbers instead of direct arguments. Registers
|
||||
take a type and a number. For example "lexenv2" is the third (counting from 0)
|
||||
lexenv register or "arg0" is the first argument register.
|
||||
|
||||
| Mnemonic | ID | Description |
|
||||
|:---------|:--:|:--------------------------------------------------------------|
|
||||
| val | 0 | General value registers (clobbered by calls) |
|
||||
| saved | 1 | Callee saved ragisters (val registers are clobbered by calls) |
|
||||
| arg | 2 | Function argument registers (clobbered by calls) |
|
||||
| ret | 3 | Function return value registers (clobbered by calls) |
|
||||
| lexenv | 4 | Lexical environment registers |
|
||||
| block | 5 | Block symbol registers |
|
||||
|
||||
### Instruction List
|
||||
|
||||
- NIL dest:reg
|
||||
Load the literal nil into DEST
|
||||
|
||||
- T def:reg
|
||||
Load the literal t into DEST
|
||||
|
||||
- STRING dest:reg, value:str
|
||||
Load the string of LENGTH bytes from DATA into dest.
|
||||
|
||||
- INT dest:reg, value:i64
|
||||
Convert VALUE into an int object and store it into DEST
|
||||
|
||||
- FLOAT dest:reg, value:double
|
||||
Convert VALUE into a float object and store it into DEST
|
||||
|
||||
- CONS dest:reg, car:reg, cdr:reg
|
||||
Create a cons object with CAR and CDR and store it into REG.
|
||||
|
||||
- LIST dest:reg, count:u64
|
||||
Create a list from the first COUNT "arg" registers and store it into DEST.
|
||||
|
||||
- VECTOR dest:reg, count:u64
|
||||
Create a vector from the first COUNT "arg" registers and store it into DEST.
|
||||
|
||||
- INTERN\_LIT reg:reg, name:str
|
||||
INTERN\_DYN reg:reg, name:reg
|
||||
These instructions convert the string literal or register containing a string
|
||||
NAME into a symbol and store the symbol into REG.
|
||||
|
||||
- SYMBOL\_NAME dest:reg, sym:reg
|
||||
Store the name of SYM into DEST.
|
||||
|
||||
- MOV dest:reg, src:reg
|
||||
Copy the value in the register SRC into the register DEST.
|
||||
|
||||
- FUNCALL reg:reg, argc:u64
|
||||
Call the function in REG. This should either be a function object or a symbol
|
||||
which has a value as a function. The return values are placed into the "ret"
|
||||
registers. ARGC is the number of "arg" registers that have been set for this
|
||||
function call.
|
||||
|
||||
- RETVAL_COUNT count:u8
|
||||
Declare that first COUNT return values have been set (if they have not been
|
||||
touched during this lexenv, they will be set to nil). Without this, the
|
||||
highest "ret" register written to is the number to use. Without this, assume
|
||||
one return value.
|
||||
|
||||
- ENTER\_LEXENV
|
||||
LEAVE\_LEXENV
|
||||
Shift all the "lexenv" registers up by 1 and then create a new lexenv and
|
||||
store it into "lexenv0". LEAVE\_LEXENV does the opposite, restoring the last
|
||||
pushed levenv.
|
||||
|
||||
- ENTER\_BLOCK sym:reg, count:u64
|
||||
LEAVE\_BLOCK sym:reg
|
||||
Enter a new named block which is identified by the symbol in SYM. The block is
|
||||
COUNT instructions long. LEAVE\_BLOCK leaved the block identified by
|
||||
SYM. Adding a new block pushes SYM onto the "block" registers, much like
|
||||
PUSH\_LEXENV (which see).
|
||||
|
||||
- SET\_VALUE sym:reg, value:reg
|
||||
Set the value as a variable of SYM to VALUE.
|
||||
|
||||
- SET\_FUNCTION sym:reg, value:reg
|
||||
Set the value as a function of SYM to VALUE (value must be an actual function,
|
||||
not a symbol).
|
||||
|
||||
- GET\_VALUE dest:reg, sym:reg
|
||||
Store the value as a variable of the symbol SYM into DEST.
|
||||
|
||||
- GET\_FUNCTION dest:reg, sym:reg
|
||||
Store the value as a function of the symbol SYM into DEST.
|
||||
|
||||
- NEWFUNCTION\_LIT dest:reg, count:u64
|
||||
NEWFUNCTION\_DYN dest:reg, src:reg
|
||||
Create a new function object and store it into DEST. If the first case the
|
||||
next COUNT instructions are considered to be the function and are skipped. In
|
||||
the second case SRC should be a list or vector containing the bytecode for the
|
||||
function.
|
||||
|
||||
- PUT sym:reg, key:reg, value:reg
|
||||
Associate KEY with VALUE in the plist of SYM.
|
||||
|
||||
- GET dest:reg, sym:reg, key:reg
|
||||
Store the value associated with KEY in the plist of SYM into DEST.
|
||||
|
||||
- AND dest:reg, count:u64, values:[reg]
|
||||
OR dest:reg, count:u64, values:[reg]
|
||||
XOR dest:reg, count:u64, values:[reg]
|
||||
NOT dest:reg, value:reg
|
||||
Perform a logical operation on each of VALUES. For example, the XOR
|
||||
instruction will exclusively or each of VALUES with the next value, and store
|
||||
the overall result into DEST. NOT is special as it can only take one value,
|
||||
of which it will take the logical negation and store it into DEST.
|
||||
|
||||
- CJMP cond:reg, offset:i64
|
||||
If the value in COND is truthy (not nil), skip the next OFFSET
|
||||
instructions. If OFFSET is negative, instead go back abs(OFFSET)
|
||||
instructions. CJMP is NOT counted as an instruction for the purposes of
|
||||
counting offsets. Therefore an OFFSET of -2 means restart execute at the
|
||||
instruction above the instruction above this CJMP.
|
||||
|
||||
- CAR dest:reg, cons:reg
|
||||
CDR dest:reg, cons:reg
|
||||
Store the car or cdr of CONS into DEST.
|
||||
|
||||
- SETCAR cons:reg, value:reg
|
||||
SETCDR cons:reg, value:reg
|
||||
Store VALUE into the car or cdr of CONS.
|
||||
|
||||
|
||||
- GETELT\_LIT dest:reg, seq:reg, index:u64
|
||||
GETELT\_DYN dest:reg, seq:reg, index:reg
|
||||
Store the value at INDEX in SEQ (a list or vector) into DEST.
|
||||
|
||||
- SETELT\_LIT seq:reg, index:u64, value:reg
|
||||
SETELT\_DYN seq:reg, index:reg, value:reg
|
||||
Store VALUE into the index numbered INDEX of SEQ (a list or vector).
|
||||
|
||||
- EQ\_TWO dest:reg, val1:reg, val2:reg
|
||||
EQ\_N dest:reg, count:u64
|
||||
Compare VAL1 and VAL2, if they are the same object (or symbols with the same
|
||||
name) store T into DEST, otherwise, store NIL. In the case of EQ\_N, if the
|
||||
first COUNT "arg" registers are the same object (or symbols with the same
|
||||
name), store T into DEST, otherwise, store NIL.
|
||||
|
||||
- NUM\_GT dest:reg, val1:reg, val2:reg
|
||||
NUM\_GE dest:reg, val1:reg, val2:reg
|
||||
NUM\_EQ dest:reg, val1:reg, val2:reg
|
||||
NUM\_LE dest:reg, val1:reg, val2:reg
|
||||
NUM\_LT dest:reg, val1:reg, val2:reg
|
||||
Compare VAL1 and VAL2, which must be numbers, and compare their values. If
|
||||
they pass, store T into DEST, otherwise store NIL.
|
||||
|
||||
## Mnemonic Conversion Table
|
||||
|
||||
| Mnemonic | Number |
|
||||
|:-----------------|:------:|
|
||||
| STRING\_LIT | 0 |
|
||||
| STRING\_DYN | 1 |
|
||||
| INT | 2 |
|
||||
| FLOAT | 3 |
|
||||
| CONS | 4 |
|
||||
| LSIT\_LIT | 5 |
|
||||
| LSIT\_DYN | 6 |
|
||||
| VECTOR\_LIT | 7 |
|
||||
| VECTOR\_DYN | 8 |
|
||||
| INTERN\_LIT | 9 |
|
||||
| INTERN\_DYN | 10 |
|
||||
| SYMBOL\_NAME | 11 |
|
||||
| MOV | 12 |
|
||||
| FUNCALL | 13 |
|
||||
| RETVAL\_COUNT | 14 |
|
||||
| ENTER\_LEXENV | 15 |
|
||||
| LEAVE\_ELEXENV | 16 |
|
||||
| ENTER\_BLOCK | 17 |
|
||||
| LEAVE\_BLOCK | 18 |
|
||||
| SET\_VALUE | 19 |
|
||||
| SET\_FUNCTION | 20 |
|
||||
| GET\_VALUE | 21 |
|
||||
| GET\_FUNCTION | 22 |
|
||||
| NEWFUNCTION\_LIT | 23 |
|
||||
| NEWFUNCTION\_DYN | 24 |
|
||||
| PUT | 25 |
|
||||
| GET | 26 |
|
||||
| AND | 27 |
|
||||
| OR | 28 |
|
||||
| XOR | 29 |
|
||||
| NOT | 30 |
|
||||
| CJMP | 31 |
|
||||
| CAR | 32 |
|
||||
| CDR | 33 |
|
||||
| SETCAR | 34 |
|
||||
| SETCDR | 35 |
|
||||
| GETELT\_LIT | 36 |
|
||||
| GETELT\_DYN | 37 |
|
||||
| SETELT\_LIT | 38 |
|
||||
| SETELT\_DYN | 39 |
|
||||
| EQ\_TWO | 40 |
|
||||
| EQ\_N | 41 |
|
||||
| NUM\_GT | 42 |
|
||||
| NUM\_GE | 43 |
|
||||
| NUM\_EQ | 44 |
|
||||
| NUM\_LE | 45 |
|
||||
| NUM\_LT | 46 |
|
||||
|
||||
## Examples
|
||||
This:
|
||||
```lisp
|
||||
(format t "Hello World~%")
|
||||
```
|
||||
Compiles into:
|
||||
```text
|
||||
INTERN_LIT val0, "Hello World~%"
|
||||
```
|
||||
|
||||
This:
|
||||
```lisp
|
||||
(defun foo (bar &key baz &rest qux)
|
||||
"FOO each of BAR and BAZ as well as each of QUX."
|
||||
(let (some-val (genval bar))
|
||||
(foo-internal some-val :baz baz qux)))
|
||||
|
||||
(foo 10 :baz 20)
|
||||
```
|
||||
Compiles into:
|
||||
```text
|
||||
NEWFUNCTION val0, 13 ;; not counting this instruction
|
||||
ENTER_LEXENV
|
||||
INTERN_LIT saved0, "foo"
|
||||
ENTER_BLOCK saved0, 9 ;; not counting this instruction
|
||||
INTERN_LIT val0, "genval"
|
||||
;; NOTE bar already in arg0
|
||||
FUNCALL val0 ;; NOTE val0 clobbered here
|
||||
INTERN_LIT val0, "foo-internal"
|
||||
MOV arg0, ret0
|
||||
MOV arg3, arg2
|
||||
MOV arg2, arg1
|
||||
INTERN_LIT arg1, ":baz"
|
||||
FUNCALL val0
|
||||
LEAVE_BLOCK saved0
|
||||
LEAVE_LEXENV
|
||||
|
||||
INTERN_LIT val1, "foo"
|
||||
SET_FUNCTION val1, val0
|
||||
INT arg0, 10
|
||||
INTERN_LIT arg1, ":baz"
|
||||
INT arg2, 20
|
||||
FUNCALL val1
|
||||
```
|
@ -5,7 +5,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS YES)
|
||||
|
||||
project(simple-lisp)
|
||||
|
||||
set(BOOTSTRAP_FILES main.c parse.c ast.c)
|
||||
set(BOOTSTRAP_FILES main.c parse.c ast.c constants.c compile.c)
|
||||
|
||||
foreach(FILE IN LISTS BOOTSTRAP_FILES)
|
||||
list(APPEND REAL_BOOTSTRAP_FILES "bootstrap/${FILE}")
|
||||
|
@ -61,17 +61,6 @@ void ast_deinit_parser() {
|
||||
regfree(&NON_DECIMAL_NUM_REGEX);
|
||||
}
|
||||
|
||||
// vasprintf is nonstandard, open_memstream is POSIX 2008
|
||||
char *compat_vasprintf(const char *fmt, va_list args) {
|
||||
va_list args2;
|
||||
va_copy(args2, args);
|
||||
size_t size = vsnprintf(NULL, 0, fmt, args) + 1;
|
||||
char *buf = malloc(size);
|
||||
vsnprintf(buf, size, fmt, args2);
|
||||
va_end(args2);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void push_error_list_end(AstErrorList **list, AstErrorList *err) {
|
||||
err->next = NULL;
|
||||
if (!*list) {
|
||||
@ -97,7 +86,8 @@ static void push_build_error(AstErrorList **list, Token *token, size_t off,
|
||||
token->buf_len = 0;
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
n->build.msg = compat_vasprintf(fmt, args);
|
||||
n->build.msg = NULL;
|
||||
vasprintf(&n->build.msg, fmt, args);
|
||||
va_end(args);
|
||||
push_error_list_end(list, n);
|
||||
}
|
||||
@ -201,9 +191,13 @@ static char escape_for_char(char to_escape) {
|
||||
return '\0';
|
||||
}
|
||||
|
||||
static char *escape_string(const char *input, size_t input_len, size_t *out_len) {
|
||||
char *ast_escape_string(const char *input, size_t input_len, size_t *out_len) {
|
||||
size_t out_size = input_len + 1;
|
||||
char *out = malloc(out_size);
|
||||
size_t backup_len;
|
||||
if (!out_len) {
|
||||
out_len = &backup_len;
|
||||
}
|
||||
*out_len = 0;
|
||||
for (size_t i = 0; i < input_len; ++i) {
|
||||
char escape = escape_for_char(input[i]);
|
||||
@ -778,9 +772,9 @@ static void ast_prin1_node_internal(AstNode *node, FILE *stream, int padding,
|
||||
break;
|
||||
case AST_TYPE_STRING: {
|
||||
size_t escaped_len;
|
||||
char *escaped_string = escape_string(((AstStringNode *)node)->value,
|
||||
((AstStringNode *)node)->length,
|
||||
&escaped_len);
|
||||
char *escaped_string = ast_escape_string(((AstStringNode *)node)->value,
|
||||
((AstStringNode *)node)->length,
|
||||
&escaped_len);
|
||||
fputc('"', stream);
|
||||
fwrite(escaped_string, 1, escaped_len, stream);
|
||||
fputc('"', stream);
|
||||
@ -837,6 +831,18 @@ void ast_prin1_node(AstNode *node, FILE *stream) {
|
||||
fputc('\n', stream);
|
||||
}
|
||||
|
||||
char *ast_prin1_node_to_string(AstNode *node, size_t *out_len) {
|
||||
size_t backup_outlen = 0;
|
||||
if (!out_len) {
|
||||
out_len = &backup_outlen;
|
||||
}
|
||||
char *outbuf = NULL;
|
||||
FILE *stream = open_memstream(&outbuf, out_len);
|
||||
ast_prin1_node(node, stream);
|
||||
fclose(stream);
|
||||
return outbuf;
|
||||
}
|
||||
|
||||
AstErrorList *ast_error_list_pop(AstErrorList **list) {
|
||||
AstErrorList *top = *list;
|
||||
if (*list) {
|
||||
|
@ -102,7 +102,9 @@ AstErrorList *ast_error_list_pop(AstErrorList **list);
|
||||
void ast_error_list_free_one(AstErrorList *list);
|
||||
void ast_error_list_free_all(AstErrorList *list);
|
||||
|
||||
char *ast_escape_string(const char *input, size_t input_len, size_t *out_len);
|
||||
void ast_prin1_node(AstNode *node, FILE *stream);
|
||||
char *ast_prin1_node_to_string(AstNode *node, size_t *out_len);
|
||||
void ast_format_error(AstErrorList *err, const char *file_name, FILE *stream);
|
||||
|
||||
#endif
|
||||
|
898
bootstrap/compile.c
Normal file
898
bootstrap/compile.c
Normal file
@ -0,0 +1,898 @@
|
||||
#include "compile.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
static ssize_t byte_compile_form_internal(CompileEnvironment *env,
|
||||
AstNode *form, LispReg *target,
|
||||
FILE *stream, CompileError **err);
|
||||
|
||||
static void push_error_raw(CompileError **list, CompileError *n) {
|
||||
if (list) {
|
||||
n->next = *list;
|
||||
*list = n;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((format(printf, 4, 5)))
|
||||
static void push_error_at_ast(CompileError **list, AstNode *form,
|
||||
CompileErrorType type, const char *fmt, ...) {
|
||||
if (list) {
|
||||
CompileError *err = malloc(sizeof(CompileError));
|
||||
err->type = type;
|
||||
err->context = ast_prin1_node_to_string(form, NULL);
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
err->message = NULL;
|
||||
vasprintf(&err->message, fmt, args);
|
||||
va_end(args);
|
||||
err->line = form->line;
|
||||
err->col = form->col;
|
||||
push_error_raw(list, err);
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((format(printf, 4, 5)))
|
||||
static void push_error_with_ctx(CompileError **list, const char *ctx,
|
||||
CompileErrorType type, const char *fmt, ...) {
|
||||
if (list) {
|
||||
CompileError *err = malloc(sizeof(CompileError));
|
||||
err->type = type;
|
||||
if (ctx) {
|
||||
err->context = strdup(ctx);
|
||||
} else {
|
||||
err->context = NULL;
|
||||
}
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
err->message = NULL;
|
||||
vasprintf(&err->message, fmt, args);
|
||||
va_end(args);
|
||||
err->line = 0; // no position information
|
||||
err->col = 0;
|
||||
push_error_raw(list, err);
|
||||
}
|
||||
}
|
||||
|
||||
CompileFormat COMPILE_FORMAT = COMPILE_FORMAT_BIN;
|
||||
|
||||
typedef enum {
|
||||
ARG_U8,
|
||||
ARG_U16,
|
||||
ARG_U32,
|
||||
ARG_U64,
|
||||
ARG_I8,
|
||||
ARG_I16,
|
||||
ARG_I32,
|
||||
ARG_I64,
|
||||
ARG_REG,
|
||||
ARG_STR,
|
||||
ARG_BYTES,
|
||||
ARG_DOUBLE,
|
||||
} InstArgType;
|
||||
|
||||
static bool emit_next_arg_c(CompileError **err, FILE *stream,
|
||||
va_list args) {
|
||||
InstArgType type = va_arg(args, InstArgType);
|
||||
if (type == ARG_STR || type == ARG_BYTES) {
|
||||
size_t len;
|
||||
if (type == ARG_BYTES) {
|
||||
len = va_arg(args, size_t);
|
||||
}
|
||||
const char *val = va_arg(args, const char *);
|
||||
if (type == ARG_STR) {
|
||||
len = strlen(val);
|
||||
}
|
||||
size_t escaped_len;
|
||||
char *escaped = ast_escape_string(val, len, &escaped_len);
|
||||
fputc('"', stream);
|
||||
fwrite(escaped, 1, escaped_len, stream);
|
||||
fputc('"', stream);
|
||||
free(escaped);
|
||||
return true;
|
||||
} else if (type == ARG_REG) {
|
||||
LispRegType type = va_arg(args, int); // LispRegType promotes to int
|
||||
if (type > N_REGISTTERS) {
|
||||
push_error_with_ctx(err, NULL, COMPILE_ERROR,
|
||||
"unknown register type: %ju", (uintmax_t) type);
|
||||
return false;
|
||||
}
|
||||
uint32_t which = va_arg(args, uint32_t);
|
||||
fprintf(stream, "%s%ju", REGISTER_NAMES[type], (uintmax_t) which);
|
||||
return true;
|
||||
} else if (type == ARG_DOUBLE) {
|
||||
double value = va_arg(args, double);
|
||||
fprintf(stream, "%f", value);
|
||||
return true;
|
||||
}
|
||||
uint64_t val;
|
||||
const char *format;
|
||||
switch (type) {
|
||||
case ARG_U8: // promoted
|
||||
case ARG_U16: // promoted
|
||||
case ARG_U32:
|
||||
val = va_arg(args, uint32_t);
|
||||
format = "%ju";
|
||||
break;
|
||||
case ARG_U64:
|
||||
val = va_arg(args, uint64_t);
|
||||
format = "%ju";
|
||||
break;
|
||||
case ARG_I8:
|
||||
case ARG_I16:
|
||||
case ARG_I32:
|
||||
val = va_arg(args, uint32_t);
|
||||
format = "%jd";
|
||||
break;
|
||||
case ARG_I64:
|
||||
val = va_arg(args, uint64_t);
|
||||
format = "%jd";
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
fprintf(stream, format, (uintmax_t) val);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool emit_next_arg_bin(CompileError **err, FILE *stream,
|
||||
va_list args) {
|
||||
InstArgType type = va_arg(args, InstArgType);
|
||||
if (type == ARG_STR || type == ARG_BYTES) {
|
||||
size_t len;
|
||||
if (type == ARG_BYTES) {
|
||||
len = va_arg(args, size_t);
|
||||
}
|
||||
const char *val = va_arg(args, const char *);
|
||||
if (type == ARG_STR) {
|
||||
len = strlen(val);
|
||||
}
|
||||
fwrite(&len, sizeof(len), 1, stream);
|
||||
fwrite(val, 1, len, stream);
|
||||
return true;
|
||||
} else if (type == ARG_REG) {
|
||||
LispRegType type = va_arg(args, int); // LispRegType promotes to int
|
||||
if (type > N_REGISTTERS) {
|
||||
push_error_with_ctx(err, NULL, COMPILE_ERROR,
|
||||
"unknown register type: %ju", (uintmax_t) type);
|
||||
return false;
|
||||
}
|
||||
uint32_t which = va_arg(args, uint32_t);
|
||||
fwrite(&type, sizeof(type), 1, stream);
|
||||
fwrite(&which, sizeof(which), 1, stream);
|
||||
return true;
|
||||
} else if (type == ARG_DOUBLE) {
|
||||
double val = va_arg(args, double);
|
||||
fwrite(&val, sizeof(val), 1, stream);
|
||||
return true;
|
||||
}
|
||||
size_t size;
|
||||
uint64_t val;
|
||||
switch (type) {
|
||||
case ARG_U8:
|
||||
case ARG_I8:
|
||||
size = 8;
|
||||
val = va_arg(args, uint32_t);
|
||||
break;
|
||||
case ARG_U16:
|
||||
case ARG_I16:
|
||||
size = 16;
|
||||
val = va_arg(args, uint32_t);
|
||||
break;
|
||||
case ARG_U32:
|
||||
case ARG_I32:
|
||||
size = 32;
|
||||
val = va_arg(args, uint32_t);
|
||||
break;
|
||||
case ARG_U64:
|
||||
case ARG_I64:
|
||||
size = 64;
|
||||
val = va_arg(args, uint64_t);
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
fwrite(((void *) &val) + sizeof(uint64_t) - size, size, 1, stream);
|
||||
return false;
|
||||
}
|
||||
|
||||
// false on error, true on success
|
||||
static bool emit_instruction(CompileError **err, FILE *stream,
|
||||
LispInst inst, size_t nargs, ...) {
|
||||
if (inst > N_INSTRUCTIONS) {
|
||||
push_error_with_ctx(err, NULL, COMPILE_ERROR,
|
||||
"unknown instruction: %ju", (intmax_t) inst);
|
||||
return false;
|
||||
}
|
||||
switch (COMPILE_FORMAT) {
|
||||
case COMPILE_FORMAT_ASM:
|
||||
fprintf(stream, "%s%s", INSTRUCTION_NAMES[inst],
|
||||
nargs ? " " : "");
|
||||
break;
|
||||
case COMPILE_FORMAT_BIN:
|
||||
fwrite(&inst, sizeof(inst), 1, stream);
|
||||
break;
|
||||
}
|
||||
bool success = true;
|
||||
va_list args;
|
||||
va_start(args, nargs);
|
||||
for (size_t i = 0; i < nargs; ++i) {
|
||||
switch (COMPILE_FORMAT) {
|
||||
case COMPILE_FORMAT_ASM:
|
||||
success = emit_next_arg_c(err, stream, args);
|
||||
if (i < nargs - 1) {
|
||||
fprintf(stream, ", ");
|
||||
} else {
|
||||
fputc('\n', stream);
|
||||
}
|
||||
break;
|
||||
case COMPILE_FORMAT_BIN:
|
||||
success = emit_next_arg_bin(err, stream, args);
|
||||
break;
|
||||
}
|
||||
if (!success) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
return success;
|
||||
}
|
||||
|
||||
CompileError *compile_error_pop(CompileError **err) {
|
||||
CompileError *retval = *err;
|
||||
if (*err) {
|
||||
*err = (*err)->next;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
void compile_error_free_one(CompileError *err) {
|
||||
free(err->message);
|
||||
free(err->context);
|
||||
free(err);
|
||||
}
|
||||
|
||||
void compile_error_free_all(CompileError **err) {
|
||||
while (*err) {
|
||||
CompileError *next = (*err)->next;
|
||||
compile_error_free_one(*err);
|
||||
*err = next;
|
||||
}
|
||||
}
|
||||
|
||||
void compile_format_error(CompileError *err, const char *file, FILE *stream) {
|
||||
fprintf(stream, "error: ");
|
||||
if (file) {
|
||||
fprintf(stream, "%s: ", file);
|
||||
}
|
||||
if (err->line) {
|
||||
fprintf(stream, "%zu:%zu:", err->line, err->col);
|
||||
}
|
||||
fprintf(stream, "%s", err->message);
|
||||
if (err->context) {
|
||||
fprintf(stream, ":\n%s\n", err->context);
|
||||
} else {
|
||||
fputc('\n', stream);
|
||||
}
|
||||
}
|
||||
|
||||
void destroy_function_entry(FunctionEntry *entry) {
|
||||
free(entry->name);
|
||||
free(entry->doc);
|
||||
for (size_t i = 0; i < entry->nkeys; ++i) {
|
||||
free(entry->keys[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void destroy_variable_entry(VariableEntry *entry) {
|
||||
free(entry->name);
|
||||
free(entry->doc);
|
||||
}
|
||||
|
||||
void destroy_compile_lexenv(CompileLexenv *lexenv) {
|
||||
for (size_t i = 0; i < lexenv->nsymbols; ++i) {
|
||||
free(lexenv->symbols[i].name);
|
||||
}
|
||||
free(lexenv->symbols);
|
||||
for (size_t i = 0; i < lexenv->nvars; ++i) {
|
||||
free(lexenv->vars[i].name);
|
||||
}
|
||||
free(lexenv->vars);
|
||||
free(lexenv);
|
||||
}
|
||||
|
||||
static void environment_enter_lexenv(CompileEnvironment *env) {
|
||||
CompileLexenv *n = malloc(sizeof(CompileLexenv));
|
||||
n->vars = NULL;
|
||||
n->nvars = 0;
|
||||
n->symbols = NULL;
|
||||
n->nsymbols = 0;
|
||||
n->first_avaiable_saved = 0;
|
||||
n->next = env->lexenv_stack;
|
||||
env->lexenv_stack = n;
|
||||
}
|
||||
|
||||
CompileEnvironment *make_compile_environment() {
|
||||
CompileEnvironment *env = malloc(sizeof(CompileEnvironment));
|
||||
env->funcs = NULL;
|
||||
env->nfuncs = 0;
|
||||
env->vars = NULL;
|
||||
env->nvars = 0;
|
||||
env->lexenv_stack = NULL;
|
||||
environment_enter_lexenv(env); // toplevel lexenv
|
||||
return env;
|
||||
}
|
||||
|
||||
void destroy_compile_environment(CompileEnvironment *env) {
|
||||
for (size_t i = 0; i < env->nfuncs; ++i) {
|
||||
destroy_function_entry(&env->funcs[i]);
|
||||
}
|
||||
for (size_t i = 0; i < env->nvars; ++i) {
|
||||
destroy_variable_entry(&env->vars[i]);
|
||||
}
|
||||
while (env->lexenv_stack) {
|
||||
CompileLexenv *next = env->lexenv_stack->next;
|
||||
destroy_compile_lexenv(env->lexenv_stack);
|
||||
env->lexenv_stack = next;
|
||||
}
|
||||
free(env);
|
||||
}
|
||||
|
||||
static bool is_symbol_named(const char *name, AstNode *form) {
|
||||
return form->type == AST_TYPE_SYMBOL &&
|
||||
strcmp(name, ((AstSymbolNode *) form)->name) == 0;
|
||||
}
|
||||
|
||||
static bool is_function_call(AstNode *form) {
|
||||
return form->type == AST_TYPE_LIST && ((AstListNode *) form)->nchildren;
|
||||
}
|
||||
|
||||
static bool is_function_call_named(const char *name, AstNode *form) {
|
||||
return is_function_call(form) &&
|
||||
is_symbol_named(name, ((AstListNode *) form)->children[1]);
|
||||
}
|
||||
|
||||
static FunctionEntry *lookup_function(CompileEnvironment *env,
|
||||
AstSymbolNode *name) {
|
||||
for (size_t i = 0; i < env->nfuncs; ++i) {
|
||||
FunctionEntry *cur = &env->funcs[i];
|
||||
if (cur->name_len == name->name_length &&
|
||||
strcmp(cur->name, name->name) == 0) {
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static VariableEntry *lookup_variable(CompileEnvironment *env,
|
||||
AstSymbolNode *name) {
|
||||
for (size_t i = 0; i < env->nvars; ++i) {
|
||||
VariableEntry *cur = &env->vars[i];
|
||||
if (cur->name_len == name->name_length &&
|
||||
strcmp(cur->name, name->name) == 0) {
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool is_property_symbol(AstNode *form) {
|
||||
return form->type == AST_TYPE_SYMBOL &&
|
||||
((AstSymbolNode *) form)->is_property;
|
||||
}
|
||||
|
||||
static bool function_key_ok(AstSymbolNode *key, FunctionEntry *func) {
|
||||
if (func->allow_other_keys) {
|
||||
return true;
|
||||
}
|
||||
for (size_t i = 0; i < func->nkeys; ++i) {
|
||||
if (strcmp(key->name, func->keys[i]) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// return false only if there is an error (not a warning)
|
||||
static bool function_arguments_ok(AstListNode *form, FunctionEntry *func,
|
||||
CompileError **err) {
|
||||
bool was_ok = true;
|
||||
size_t npositional = form->nchildren - 1;
|
||||
for (size_t i = 1; i < form->nchildren; ++i) {
|
||||
if (is_property_symbol(form->children[i])) {
|
||||
if (!function_key_ok((AstSymbolNode *) form->children[i], func)) {
|
||||
push_error_at_ast(err, form->children[i], COMPILE_WARNING,
|
||||
"unknown key for function %s", func->name);
|
||||
}
|
||||
if (i == form->nchildren - 1) {
|
||||
push_error_at_ast(err, form->children[i], COMPILE_ERROR,
|
||||
"no value provided to key");
|
||||
was_ok = false;
|
||||
}
|
||||
// skip next arg
|
||||
++i;
|
||||
npositional -= 2;
|
||||
}
|
||||
}
|
||||
if (npositional < func->nrequired) {
|
||||
push_error_at_ast(err, (AstNode *) form, COMPILE_ERROR,
|
||||
"function expects at least %zu argument(s), got %zu",
|
||||
func->nrequired, npositional);
|
||||
was_ok = false;
|
||||
} else if (!func->has_rest && npositional > func->nrequired + func->noptional) {
|
||||
push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR,
|
||||
"function expects at most %zu argument(s), got %zu",
|
||||
func->nrequired + func->noptional, npositional);
|
||||
was_ok = false;
|
||||
}
|
||||
return was_ok;
|
||||
}
|
||||
|
||||
static void intern_in_lexenv(CompileLexenv *lexenv, const char *name,
|
||||
LispReg *reg) {
|
||||
if (lexenv) {
|
||||
lexenv->symbols = realloc(lexenv->symbols, sizeof(*lexenv->symbols)
|
||||
* ++(lexenv->nsymbols));
|
||||
lexenv->symbols[lexenv->nsymbols - 1].name = strdup(name);
|
||||
lexenv->symbols[lexenv->nsymbols - 1].reg = *reg;
|
||||
}
|
||||
}
|
||||
|
||||
static LispReg *lookup_symbol_reg(CompileEnvironment *env, const char *name) {
|
||||
CompileLexenv *lexenv = env->lexenv_stack;
|
||||
if (!lexenv) {
|
||||
return NULL;
|
||||
}
|
||||
for (size_t i = 0; i < lexenv->nsymbols; ++i) {
|
||||
if (strcmp(name, lexenv->symbols[i].name) == 0) {
|
||||
return &lexenv->symbols[i].reg;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static LispReg *lookup_local_var(CompileEnvironment *env, const char *name) {
|
||||
CompileLexenv *lexenv = env->lexenv_stack;
|
||||
if (!lexenv) {
|
||||
return NULL;
|
||||
}
|
||||
for (size_t i = 0; i < lexenv->nvars; ++i) {
|
||||
if (strcmp(name, lexenv->vars[i].name) == 0) {
|
||||
return &lexenv->vars[i].reg;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void next_open_saved_reg(CompileEnvironment *env, LispReg *out) {
|
||||
out->type = REG_SAVED;
|
||||
if (env->lexenv_stack) {
|
||||
out->which = env->lexenv_stack->first_avaiable_saved++;
|
||||
} else {
|
||||
out->which = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// return number of instructions emitted, or -1 on error
|
||||
static ssize_t intern_to_register(CompileEnvironment *env, FILE *stream,
|
||||
const char *name, size_t name_len,
|
||||
LispReg *target, CompileError **err) {
|
||||
CompileLexenv *lexenv = env->lexenv_stack;
|
||||
LispReg *cur_reg = lookup_symbol_reg(env, name);
|
||||
if (cur_reg) {
|
||||
if (SAME_REG(cur_reg, target)) {
|
||||
return 0;
|
||||
}
|
||||
if (!emit_instruction(err, stream, INST_MOV, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_REG, cur_reg->type, cur_reg->which)) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (!emit_instruction(err, stream, INST_INTERN_LIT, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_BYTES, name_len, name)) {
|
||||
return -1;
|
||||
}
|
||||
if (target->type == REG_SAVED) {
|
||||
intern_in_lexenv(lexenv, name, target);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static ssize_t intern_and_save(CompileEnvironment *env, FILE *stream,
|
||||
const char *name, size_t name_len,
|
||||
LispReg *reg, CompileError **err) {
|
||||
CompileLexenv *lexenv = env->lexenv_stack;
|
||||
LispReg *cur_reg = lookup_symbol_reg(env, name);
|
||||
if (cur_reg) {
|
||||
*reg = *cur_reg;
|
||||
return 0;
|
||||
}
|
||||
next_open_saved_reg(env, reg);
|
||||
if (!emit_instruction(err, stream, INST_INTERN_LIT, 2,
|
||||
ARG_REG, reg->type, reg->which,
|
||||
ARG_BYTES, name_len, name)) {
|
||||
return -1;
|
||||
}
|
||||
intern_in_lexenv(lexenv, name, reg);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static ssize_t compile_defun_call(CompileEnvironment *env, AstListNode *form,
|
||||
FILE *stream, CompileError **err) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t compile_devar_call(CompileEnvironment *env, AstListNode *form,
|
||||
FILE *stream, CompileError **err) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
ssize_t (*handler)(CompileEnvironment *env, AstListNode *form, FILE *stream,
|
||||
CompileError **err);
|
||||
} NATIVE_FUNCTIONS[] = {
|
||||
{"defun", compile_defun_call},
|
||||
{"defmacro", compile_defun_call},
|
||||
{"devar", compile_devar_call},
|
||||
{"defconst", compile_devar_call},
|
||||
{"defparam", compile_devar_call},
|
||||
};
|
||||
const size_t N_NATIVE_FUNCTIONS = sizeof(NATIVE_FUNCTIONS) /
|
||||
sizeof(NATIVE_FUNCTIONS[0]);
|
||||
|
||||
static ssize_t compile_string_node(CompileEnvironment *env, AstStringNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
return emit_instruction(err, stream, INST_STRING, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_BYTES, form->length, form->value) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ssize_t compile_int_node(CompileEnvironment *env, AstIntNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
|
||||
return emit_instruction(err, stream, INST_INT, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_U64, form->value) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ssize_t compile_float_node(CompileEnvironment *env, AstFloatNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
|
||||
return emit_instruction(err, stream, INST_FLOAT, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_DOUBLE, form->value) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ssize_t compile_null_node(CompileEnvironment *env, AstNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
return emit_instruction(err, stream, INST_NIL, 1,
|
||||
ARG_REG, target->type, target->which) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ssize_t compile_symbol_node(CompileEnvironment *env, AstSymbolNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
if (strcmp(form->name, "t") == 0) {
|
||||
return emit_instruction(err, stream, INST_T, 1,
|
||||
ARG_REG, target->type, target->which) ? 1 : -1;
|
||||
}
|
||||
LispReg *var_reg = lookup_local_var(env, form->name);
|
||||
if (var_reg) {
|
||||
if (SAME_REG(var_reg, target)) {
|
||||
return 0;
|
||||
}
|
||||
return emit_instruction(err, stream, INST_MOV, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_REG, var_reg->type, var_reg->which) ? 1 : -1;
|
||||
} else {
|
||||
// global variable
|
||||
if (!lookup_variable(env, form)) {
|
||||
// TODO add to queue to check later
|
||||
}
|
||||
LispReg sym_reg;
|
||||
ssize_t ec = intern_and_save(env, stream, form->name, form->name_length,
|
||||
&sym_reg, err);
|
||||
if (ec < 0) {
|
||||
return -1;
|
||||
}
|
||||
emit_instruction(err, stream, INST_GET_VALUE, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_REG, sym_reg.type, sym_reg.which);
|
||||
return ec + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t compile_quoted_symbol_node(CompileEnvironment *env, AstSymbolNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
return emit_instruction(err, stream, INST_INTERN_LIT, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_BYTES, form->name_length, form->name) ? 1 : -1;
|
||||
}
|
||||
|
||||
static ssize_t compile_quoted_quote_node(CompileEnvironment *env, AstQuoteNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
// return (quote internal-form)
|
||||
LispReg arg_reg = {
|
||||
.type = REG_ARG,
|
||||
.which = 1,
|
||||
};
|
||||
ssize_t ec = byte_compile_form_internal(env, (AstNode *) form->form,
|
||||
&arg_reg, stream, err);
|
||||
if (ec < 0) {
|
||||
return -1;
|
||||
}
|
||||
LispReg quote_sym_reg;
|
||||
ssize_t ec2 = intern_and_save(env, stream, "quote", sizeof("quote") - 1,
|
||||
"e_sym_reg, err);
|
||||
if (ec2 < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (!emit_instruction(err, stream, INST_MOV, 2,
|
||||
ARG_REG, REG_ARG, (uint32_t) 0,
|
||||
ARG_REG, quote_sym_reg.type, quote_sym_reg.which)) {
|
||||
return -1;
|
||||
}
|
||||
if (!emit_instruction(err, stream, INST_LIST, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_U64, (uint64_t) 2)) {
|
||||
return -1;
|
||||
}
|
||||
return ec + ec2 + 2;
|
||||
}
|
||||
|
||||
static ssize_t compile_quote_node(CompileEnvironment *env, AstQuoteNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
switch (form->type) {
|
||||
case AST_QUOTE_NORM:
|
||||
case AST_QUOTE_BACK: {
|
||||
env->quote = form->type;
|
||||
size_t ec = byte_compile_form_internal(env, form->form, target,
|
||||
stream, err);
|
||||
env->quote = AST_QUOTE_NONE;
|
||||
return ec;
|
||||
}
|
||||
case AST_QUOTE_COMMA: {
|
||||
if (env->quote != AST_QUOTE_BACK) {
|
||||
push_error_at_ast(err, (AstNode *)form, COMPILE_ERROR,
|
||||
"comma not inside backquote");
|
||||
return -1;
|
||||
}
|
||||
env->quote = AST_QUOTE_NONE;
|
||||
ssize_t ec = byte_compile_form_internal(env, form->form, target,
|
||||
stream, err);
|
||||
env->quote = AST_QUOTE_BACK;
|
||||
return ec;
|
||||
}
|
||||
case AST_QUOTE_SPLICE:
|
||||
// TODO handle this
|
||||
return 0;
|
||||
case AST_QUOTE_NONE:
|
||||
// shouldn't happen, do nothing
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t compile_quoted_list_node(CompileEnvironment *env, AstListNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
LispReg arg_reg = {
|
||||
.type = REG_ARG,
|
||||
};
|
||||
size_t total_emitted = 0;
|
||||
for (size_t i = 0; i < form->nchildren; ++i) {
|
||||
arg_reg.which = i;
|
||||
ssize_t ec = byte_compile_form_internal(env, form->children[i],
|
||||
&arg_reg, stream, err);
|
||||
if (ec < 0) {
|
||||
return -1;
|
||||
}
|
||||
total_emitted += ec;
|
||||
}
|
||||
if (!emit_instruction(err, stream, INST_LIST, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_U64, form->nchildren)) {
|
||||
return -1;
|
||||
}
|
||||
return total_emitted + 1;
|
||||
}
|
||||
|
||||
static ssize_t compile_vector_node(CompileEnvironment *env, AstVectorNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
env->quote = AST_QUOTE_NORM;
|
||||
LispReg arg_reg = {
|
||||
.type = REG_ARG,
|
||||
};
|
||||
size_t total_emitted = 0;
|
||||
for (size_t i = 0; i < form->nchildren; ++i) {
|
||||
arg_reg.which = i;
|
||||
ssize_t ec = byte_compile_form_internal(env, form->children[i],
|
||||
&arg_reg, stream, err);
|
||||
if (ec < 0) {
|
||||
return -1;
|
||||
}
|
||||
total_emitted += ec;
|
||||
}
|
||||
if (!emit_instruction(err, stream, INST_VECTOR, 2,
|
||||
ARG_REG, target->type, target->which,
|
||||
ARG_U64, form->nchildren)) {
|
||||
return -1;
|
||||
}
|
||||
env->quote = AST_QUOTE_NONE;
|
||||
return total_emitted + 1;
|
||||
}
|
||||
|
||||
static ssize_t compile_atom_node(CompileEnvironment *env, AstNode *form,
|
||||
LispReg *target, FILE *stream,
|
||||
CompileError **err) {
|
||||
switch (form->type) {
|
||||
case AST_TYPE_INT:
|
||||
return compile_int_node(env, (AstIntNode *) form, target, stream, err);
|
||||
case AST_TYPE_FLOAT:
|
||||
return compile_float_node(env, (AstFloatNode *)form, target, stream, err);
|
||||
case AST_TYPE_STRING:
|
||||
return compile_string_node(env, (AstStringNode *) form, target, stream, err);
|
||||
case AST_TYPE_NULL:
|
||||
return compile_null_node(env, form, target, stream, err);
|
||||
case AST_TYPE_SYMBOL:
|
||||
if (env->quote == AST_QUOTE_NONE) {
|
||||
return compile_symbol_node(env, (AstSymbolNode *) form, target,
|
||||
stream, err);
|
||||
} else {
|
||||
return compile_quoted_symbol_node(env, (AstSymbolNode *) form,
|
||||
target, stream, err);
|
||||
}
|
||||
case AST_TYPE_QUOTE:
|
||||
if (env->quote == AST_QUOTE_NONE) {
|
||||
return compile_quote_node(env, (AstQuoteNode *) form, target, stream, err);
|
||||
} else {
|
||||
return compile_quoted_quote_node(env, (AstQuoteNode *) form, target,
|
||||
stream, err);
|
||||
}
|
||||
case AST_TYPE_VECTOR:
|
||||
return compile_vector_node(env, (AstVectorNode *)form, target,
|
||||
stream, err);
|
||||
case AST_TYPE_LIST:
|
||||
if (env->quote == AST_QUOTE_NONE) {
|
||||
return compile_quoted_list_node(env, (AstListNode *) form, target,
|
||||
stream, err);
|
||||
} else {
|
||||
// probably an error, don't do anything though
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t compile_generic_function_call(CompileEnvironment *env,
|
||||
AstListNode *form, FILE *stream,
|
||||
CompileError **err) {
|
||||
size_t emitted_count = 0;
|
||||
AstSymbolNode *name = (AstSymbolNode *) form->children[0];
|
||||
FunctionEntry *entry = lookup_function(env, name);
|
||||
if (!entry) {
|
||||
// TODO add to list to check later
|
||||
} else if (!function_arguments_ok(form, entry, err)) {
|
||||
// arguments invalid, give up compiling form
|
||||
return -1;
|
||||
}
|
||||
// first, thigs that clobber the "arg" registers
|
||||
for (size_t i = 1; i < form->nchildren; ++i) {
|
||||
switch (form->children[i]->type) {
|
||||
case AST_TYPE_QUOTE:
|
||||
case AST_TYPE_VECTOR:
|
||||
case AST_TYPE_LIST: {
|
||||
bool success = true;
|
||||
LispReg reg = {
|
||||
.type = REG_ARG,
|
||||
.which = i - 1,
|
||||
};
|
||||
ssize_t ec = byte_compile_form_internal(env, form->children[i],
|
||||
®, stream, err);
|
||||
if (!success) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
// then all the other types of arguments
|
||||
for (size_t i = 1; i < form->nchildren; ++i) {
|
||||
switch (form->children[i]->type) {
|
||||
case AST_TYPE_SYMBOL:
|
||||
case AST_TYPE_INT:
|
||||
case AST_TYPE_FLOAT:
|
||||
case AST_TYPE_STRING:
|
||||
case AST_TYPE_NULL: {
|
||||
bool success = true;
|
||||
LispReg reg = {
|
||||
.type = REG_ARG,
|
||||
.which = i - 1,
|
||||
};
|
||||
ssize_t ec = byte_compile_form_internal(env, form->children[i],
|
||||
®, stream, err);
|
||||
if (!success) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AST_TYPE_QUOTE:
|
||||
case AST_TYPE_VECTOR:
|
||||
case AST_TYPE_LIST:
|
||||
break;
|
||||
}
|
||||
if (form->children[i]->type != AST_TYPE_LIST) {
|
||||
}
|
||||
}
|
||||
LispReg name_reg;
|
||||
ssize_t ec = intern_and_save(env, stream, name->name, name->name_length,
|
||||
&name_reg, err);
|
||||
if (ec == -1) {
|
||||
return -1;
|
||||
}
|
||||
emit_instruction(err, stream, INST_FUNCALL, 2,
|
||||
ARG_REG, name_reg.type, name_reg.which,
|
||||
ARG_U64, form->nchildren - 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t byte_compile_form_internal(CompileEnvironment *env,
|
||||
AstNode *form, LispReg *target,
|
||||
FILE *stream, CompileError **err) {
|
||||
if (form->type != AST_TYPE_LIST) {
|
||||
if (!target) {
|
||||
// no use in compiling a literal if the value will just be thrown out
|
||||
return 0;
|
||||
}
|
||||
return compile_atom_node(env, form, target, stream, err);
|
||||
} else if (env->quote != AST_QUOTE_NONE) {
|
||||
return compile_quoted_list_node(env, (AstListNode *) form, target,
|
||||
stream, err);
|
||||
} else if (is_function_call(form)) {
|
||||
for (size_t i = 0; i < N_NATIVE_FUNCTIONS; ++i) {
|
||||
if (is_function_call_named(NATIVE_FUNCTIONS[i].name, form)) {
|
||||
return NATIVE_FUNCTIONS[i].handler(env, (AstListNode *) form,
|
||||
stream, err);
|
||||
}
|
||||
}
|
||||
return compile_generic_function_call(env, (AstListNode *) form, stream, err);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ssize_t byte_compile_form(CompileEnvironment *env, AstNode *form, FILE *stream,
|
||||
CompileError **err) {
|
||||
env->first_available_var = 0;
|
||||
env->quote = AST_QUOTE_NONE;
|
||||
size_t ninst = byte_compile_form_internal(env, form, NULL, stream, err);
|
||||
// reverse err
|
||||
if (err) {
|
||||
CompileError *cur_err = *err;
|
||||
CompileError *prev_err = NULL;
|
||||
while (cur_err) {
|
||||
CompileError *next = cur_err->next;
|
||||
cur_err->next = prev_err;
|
||||
prev_err = cur_err;
|
||||
cur_err = next;
|
||||
}
|
||||
*err = prev_err;
|
||||
}
|
||||
return ninst;
|
||||
}
|
107
bootstrap/compile.h
Normal file
107
bootstrap/compile.h
Normal file
@ -0,0 +1,107 @@
|
||||
#ifndef INCLUDED_COMPILE_H
|
||||
#define INCLUDED_COMPILE_H
|
||||
|
||||
#include "ast.h"
|
||||
#include "constants.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
typedef enum {
|
||||
COMPILE_FORMAT_ASM,
|
||||
COMPILE_FORMAT_BIN,
|
||||
} CompileFormat;
|
||||
|
||||
extern CompileFormat COMPILE_FORMAT;
|
||||
|
||||
typedef enum {
|
||||
COMPILE_WARNING,
|
||||
COMPILE_ERROR,
|
||||
} CompileErrorType;
|
||||
|
||||
typedef struct _CompileError {
|
||||
struct _CompileError *next;
|
||||
CompileErrorType type;
|
||||
// this will be zero if there is no position information
|
||||
size_t line;
|
||||
size_t col;
|
||||
char *message;
|
||||
char *context;
|
||||
} CompileError;
|
||||
|
||||
CompileError *compile_error_pop(CompileError **err);
|
||||
void compile_error_free_one(CompileError *err);
|
||||
void compile_error_free_all(CompileError **err);
|
||||
void compile_format_error(CompileError *err, const char *file, FILE *stream);
|
||||
|
||||
typedef struct {
|
||||
size_t line;
|
||||
size_t col;
|
||||
char *name;
|
||||
size_t name_len;
|
||||
size_t nrequired;
|
||||
size_t noptional;
|
||||
size_t nkeys;
|
||||
char **keys;
|
||||
|
||||
bool allow_other_keys;
|
||||
bool has_rest;
|
||||
|
||||
char *doc;
|
||||
size_t doc_len;
|
||||
} FunctionEntry;
|
||||
|
||||
void destroy_function_entry(FunctionEntry *entry);
|
||||
|
||||
typedef struct {
|
||||
size_t line;
|
||||
size_t col;
|
||||
char *name;
|
||||
size_t name_len;
|
||||
bool is_const;
|
||||
|
||||
char *doc;
|
||||
size_t doc_len;
|
||||
} VariableEntry;
|
||||
|
||||
void destroy_variable_entry(VariableEntry *entry);
|
||||
|
||||
typedef struct _CompileLexenv {
|
||||
struct _CompileLexenv *next;
|
||||
// local variables
|
||||
struct {
|
||||
char *name;
|
||||
LispReg reg;
|
||||
} *vars;
|
||||
size_t nvars;
|
||||
// symbols interned in this lexenv
|
||||
struct {
|
||||
char *name;
|
||||
LispReg reg;
|
||||
} *symbols;
|
||||
uint32_t first_avaiable_saved;
|
||||
size_t nsymbols;
|
||||
} CompileLexenv;
|
||||
|
||||
void destroy_compile_lexenv(CompileLexenv *lexenv);
|
||||
|
||||
typedef struct {
|
||||
size_t nfuncs;
|
||||
FunctionEntry *funcs;
|
||||
size_t nvars;
|
||||
VariableEntry *vars;
|
||||
size_t nlexenv;
|
||||
CompileLexenv *lexenv_stack;
|
||||
|
||||
// the minimum var register that is safe to use
|
||||
uint32_t first_available_var;
|
||||
AstQuoteType quote;
|
||||
} CompileEnvironment;
|
||||
|
||||
CompileEnvironment *make_compile_environment(void);
|
||||
void destroy_compile_environment(CompileEnvironment *env);
|
||||
|
||||
ssize_t byte_compile_form(CompileEnvironment *env, AstNode *form, FILE *stream,
|
||||
CompileError **err);
|
||||
|
||||
#endif
|
59
bootstrap/constants.c
Normal file
59
bootstrap/constants.c
Normal file
@ -0,0 +1,59 @@
|
||||
#include "constants.h"
|
||||
|
||||
const char *INSTRUCTION_NAMES[] = {
|
||||
[INST_NIL] = "NIL",
|
||||
[INST_T] = "T",
|
||||
[INST_STRING] = "STRING",
|
||||
[INST_INT] = "INT",
|
||||
[INST_FLOAT] = "FLOAT",
|
||||
[INST_CONS] = "CONS",
|
||||
[INST_LIST] = "LIST",
|
||||
[INST_VECTOR] = "VECTOR",
|
||||
[INST_INTERN_LIT] = "INTERN_LIT",
|
||||
[INST_INTERN_DYN] = "INTERN_DYN",
|
||||
[INST_SYMBOL_NAME] = "SYMBOL_NAME",
|
||||
[INST_MOV] = "MOV",
|
||||
[INST_FUNCALL] = "FUNCALL",
|
||||
[INST_RETVAL_COUNT] = "RETVAL_COUNT",
|
||||
[INST_ENTER_LEXENV] = "ENTER_LEXENV",
|
||||
[INST_LEAVE_ELEXENV] = "LEAVE_ELEXENV",
|
||||
[INST_ENTER_BLOCK] = "ENTER_BLOCK",
|
||||
[INST_LEAVE_BLOCK] = "LEAVE_BLOCK",
|
||||
[INST_SET_VALUE] = "SET_VALUE",
|
||||
[INST_SET_FUNCTION] = "SET_FUNCTION",
|
||||
[INST_GET_VALUE] = "GET_VALUE",
|
||||
[INST_GET_FUNCTION] = "GET_FUNCTION",
|
||||
[INST_NEWFUNCTION_LIT] = "NEWFUNCTION_LIT",
|
||||
[INST_NEWFUNCTION_DYN] = "NEWFUNCTION_DYN",
|
||||
[INST_PUT] = "PUT",
|
||||
[INST_GET] = "GET",
|
||||
[INST_AND] = "AND",
|
||||
[INST_OR] = "OR",
|
||||
[INST_XOR] = "XOR",
|
||||
[INST_NOT] = "NOT",
|
||||
[INST_CJMP] = "CJMP",
|
||||
[INST_CAR] = "CAR",
|
||||
[INST_CDR] = "CDR",
|
||||
[INST_SETCAR] = "SETCAR",
|
||||
[INST_SETCDR] = "SETCDR",
|
||||
[INST_GETELT_LIT] = "GETELT_LIT",
|
||||
[INST_GETELT_DYN] = "GETELT_DYN",
|
||||
[INST_SETELT_LIT] = "SETELT_LIT",
|
||||
[INST_SETELT_DYN] = "SETELT_DYN",
|
||||
[INST_EQ_TWO] = "EQ_TWO",
|
||||
[INST_EQ_N] = "EQ_N",
|
||||
[INST_NUM_GT] = "NUM_GT",
|
||||
[INST_NUM_GE] = "NUM_GE",
|
||||
[INST_NUM_EQ] = "NUM_EQ",
|
||||
[INST_NUM_LE] = "NUM_LE",
|
||||
[INST_NUM_LT] = "NUM_LT",
|
||||
};
|
||||
|
||||
const char *REGISTER_NAMES[] = {
|
||||
[REG_VAL] = "val",
|
||||
[REG_SAVED] = "saved",
|
||||
[REG_ARG] = "arg",
|
||||
[REG_RET] = "ret",
|
||||
[REG_LEXENV] = "lexenv",
|
||||
[REG_BLOCK] = "block",
|
||||
};
|
77
bootstrap/constants.h
Normal file
77
bootstrap/constants.h
Normal file
@ -0,0 +1,77 @@
|
||||
#ifndef INCLUDED_CONSTANTS_H
|
||||
#define INCLUDED_CONSTANTS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint16_t LispInst;
|
||||
|
||||
|
||||
#define INST_NIL ((LispInst) 0)
|
||||
#define INST_T ((LispInst) 1)
|
||||
#define INST_STRING ((LispInst) 2)
|
||||
#define INST_INT ((LispInst) 3)
|
||||
#define INST_FLOAT ((LispInst) 4)
|
||||
#define INST_CONS ((LispInst) 5)
|
||||
#define INST_LIST ((LispInst) 6)
|
||||
#define INST_VECTOR ((LispInst) 7)
|
||||
#define INST_INTERN_LIT ((LispInst) 8)
|
||||
#define INST_INTERN_DYN ((LispInst) 9)
|
||||
#define INST_SYMBOL_NAME ((LispInst) 10)
|
||||
#define INST_MOV ((LispInst) 11)
|
||||
#define INST_FUNCALL ((LispInst) 12)
|
||||
#define INST_RETVAL_COUNT ((LispInst) 13)
|
||||
#define INST_ENTER_LEXENV ((LispInst) 14)
|
||||
#define INST_LEAVE_ELEXENV ((LispInst) 15)
|
||||
#define INST_ENTER_BLOCK ((LispInst) 16)
|
||||
#define INST_LEAVE_BLOCK ((LispInst) 17)
|
||||
#define INST_SET_VALUE ((LispInst) 18)
|
||||
#define INST_SET_FUNCTION ((LispInst) 19)
|
||||
#define INST_GET_VALUE ((LispInst) 20)
|
||||
#define INST_GET_FUNCTION ((LispInst) 21)
|
||||
#define INST_NEWFUNCTION_LIT ((LispInst) 22)
|
||||
#define INST_NEWFUNCTION_DYN ((LispInst) 23)
|
||||
#define INST_PUT ((LispInst) 24)
|
||||
#define INST_GET ((LispInst) 25)
|
||||
#define INST_AND ((LispInst) 26)
|
||||
#define INST_OR ((LispInst) 27)
|
||||
#define INST_XOR ((LispInst) 28)
|
||||
#define INST_NOT ((LispInst) 29)
|
||||
#define INST_CJMP ((LispInst) 30)
|
||||
#define INST_CAR ((LispInst) 31)
|
||||
#define INST_CDR ((LispInst) 32)
|
||||
#define INST_SETCAR ((LispInst) 33)
|
||||
#define INST_SETCDR ((LispInst) 34)
|
||||
#define INST_GETELT_LIT ((LispInst) 35)
|
||||
#define INST_GETELT_DYN ((LispInst) 36)
|
||||
#define INST_SETELT_LIT ((LispInst) 37)
|
||||
#define INST_SETELT_DYN ((LispInst) 38)
|
||||
#define INST_EQ_TWO ((LispInst) 39)
|
||||
#define INST_EQ_N ((LispInst) 40)
|
||||
#define INST_NUM_GT ((LispInst) 41)
|
||||
#define INST_NUM_GE ((LispInst) 42)
|
||||
#define INST_NUM_EQ ((LispInst) 43)
|
||||
#define INST_NUM_LE ((LispInst) 44)
|
||||
#define INST_NUM_LT ((LispInst) 45)
|
||||
#define N_INSTRUCTIONS ((LispInst) 46)
|
||||
|
||||
extern const char *INSTRUCTION_NAMES[];
|
||||
|
||||
typedef uint8_t LispRegType;
|
||||
typedef struct {
|
||||
LispRegType type;
|
||||
uint32_t which;
|
||||
} LispReg;
|
||||
|
||||
#define REG_VAL ((LispRegType) 0)
|
||||
#define REG_SAVED ((LispRegType) 1)
|
||||
#define REG_ARG ((LispRegType) 2)
|
||||
#define REG_RET ((LispRegType) 3)
|
||||
#define REG_LEXENV ((LispRegType) 4)
|
||||
#define REG_BLOCK ((LispRegType) 5)
|
||||
#define N_REGISTTERS 6
|
||||
|
||||
extern const char *REGISTER_NAMES[];
|
||||
|
||||
#define SAME_REG(r1, r2) ((r1)->type == (r2)->type && (r1)->which == (r2)->which)
|
||||
|
||||
#endif
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "parse.h"
|
||||
#include "ast.h"
|
||||
#include "compile.h"
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
ast_init_parser();
|
||||
@ -10,19 +11,28 @@ int main(int argc, const char **argv) {
|
||||
perror("fopen");
|
||||
}
|
||||
TokenStream *stream = make_token_stream(file);
|
||||
AstErrorList *errs;
|
||||
COMPILE_FORMAT = COMPILE_FORMAT_ASM;
|
||||
CompileEnvironment *env = make_compile_environment();
|
||||
AstErrorList *ast_errs;
|
||||
CompileError *comp_errs;
|
||||
while (!token_stream_is_eof(stream)) {
|
||||
AstNode *node = ast_next_toplevel(stream, &errs);
|
||||
if (node) {
|
||||
ast_prin1_node(node, stdout);
|
||||
}
|
||||
while (errs) {
|
||||
AstErrorList *err = ast_error_list_pop(&errs);
|
||||
ast_format_error(err, "bootstrap/test.sl", stderr);
|
||||
AstNode *node = ast_next_toplevel(stream, &ast_errs);
|
||||
while (ast_errs) {
|
||||
AstErrorList *err = ast_error_list_pop(&ast_errs);
|
||||
ast_format_error(err, "test.sl", stderr);
|
||||
ast_error_list_free_one(err);
|
||||
}
|
||||
if (node) {
|
||||
ssize_t ninst = byte_compile_form(env, node, stdout, &comp_errs);
|
||||
while (comp_errs) {
|
||||
CompileError *err = compile_error_pop(&comp_errs);
|
||||
compile_format_error(err, "test.sl", stderr);
|
||||
compile_error_free_one(err);
|
||||
}
|
||||
}
|
||||
destroy_ast_node(node);
|
||||
}
|
||||
destroy_compile_environment(env);
|
||||
destroy_token_stream(stream);
|
||||
ast_deinit_parser();
|
||||
return 0;
|
||||
|
@ -1 +1 @@
|
||||
'('a `(,a '(,a)))
|
||||
(format t '(abc) [a])
|
Loading…
Reference in New Issue
Block a user