Skip to content

Commit c7bf6e1

Browse files
add partial parser code
1 parent 32a214a commit c7bf6e1

File tree

8 files changed

+373
-1823
lines changed

8 files changed

+373
-1823
lines changed

.vscode/settings.json

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
{
22
"cSpell.words": [
3+
"allocs",
4+
"defop",
5+
"eofp",
36
"eqcmp",
7+
"intof",
8+
"numof",
9+
"stringof",
410
"tinobsy"
511
],
612
"files.associations": {
713
"*.tmpl": "jinja-html",
814
"*.html": "html",
9-
"functional": "cpp",
1015
"array": "cpp",
1116
"string": "cpp",
12-
"string_view": "cpp",
13-
"cstdio": "cpp"
17+
"string_view": "cpp"
1418
}
1519
}

pickle.cpp

+224-46
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,24 @@ char escape(char c) {
3535
}
3636
}
3737

38-
bool needs_escape(char c) {
39-
return strchr("{}\b\t\n\v\f\r\a\\\"", c) != NULL;
40-
}
41-
42-
void free_payload(object* o) { free(o->as_ptr); }
38+
static void free_payload(object* o) { free(o->as_ptr); }
39+
static object* mark_car_only(tinobsy::vm* _, object* o) { return car(o); }
4340

4441
// ------------------------ core types -----------------
4542
// these will later be swapped for actual objects
4643

4744
// cons = car, cdr
4845
const object_type cons_type("cons", tinobsy::markcons, NULL, NULL);
46+
const object_type obj_type("object", tinobsy::markcons, NULL, NULL);
4947
// --------- primitive/ish types ---------------
50-
const object_type string_type("string", NULL, free_payload, NULL);
51-
const object_type symbol_type("symbol", NULL, free_payload, NULL);
52-
const object_type c_function_type("c_function", NULL, NULL, NULL);
53-
const object_type integer_type("int", NULL, NULL, NULL);
54-
const object_type float_type("float", NULL, NULL, NULL);
48+
const object_type string_type("string", mark_car_only, free_payload, NULL);
49+
const object_type symbol_type("symbol", mark_car_only, free_payload, NULL);
50+
const object_type c_function_type("c_function", mark_car_only, NULL, NULL);
51+
const object_type integer_type("int", mark_car_only, NULL, NULL);
52+
const object_type float_type("float", mark_car_only, NULL, NULL);
5553
const object_type* primitives[] = { &string_type, &symbol_type, &c_function_type, &integer_type, &float_type, NULL };
5654

57-
void pickle::mark_globals() {
55+
void pvm::mark_globals() {
5856
this->markobject(this->queue);
5957
this->markobject(this->globals);
6058
this->markobject(this->function_registry);
@@ -99,9 +97,9 @@ object* delassoc(object** list, object* key) {
9997
return NULL;
10098
}
10199

102-
// ---------- EVAL ENGINE --------------------------------------------
100+
// ---------- STACK MACHINE --------------------------------------------
103101

104-
void pickle::start_thread() {
102+
void pvm::start_thread() {
105103
// thread is list of (data stack, next instruction, instruction stack)
106104
object* new_thread = this->cons(nil, this->cons(nil, nil));
107105
if (!this->queue) {
@@ -116,15 +114,15 @@ void pickle::start_thread() {
116114
cdr(last) = this->queue;
117115
}
118116

119-
void pickle::step() {
117+
void pvm::step() {
120118
next_inst:
121119
if (!this->queue) return;
122120
object* next_type = car(cdr(this->curr_thread()));
123121
object* op = this->pop_inst();
124122
if (!op) {
125123
object* last = this->queue;
126124
if (cdr(last) == last) {
127-
// last thread: nothing to do
125+
// last thread and nothing to do
128126
this->queue = nil;
129127
return;
130128
}
@@ -136,46 +134,216 @@ void pickle::step() {
136134
object* type = car(op);
137135
if (eqcmp(type, next_type) != 0) goto next_inst;
138136
object* inst_name = car(cdr(op));
139-
object* inst_payload = cdr(cdr(op));
137+
object* cookie = cdr(cdr(op));
140138
object* pair = assoc(this->function_registry, inst_name);
141139
ASSERT(pair, "Unknown instruction %s", this->stringof(inst_name));
142-
next_type = this->fptr(cdr(pair))(this, inst_payload);
140+
next_type = this->fptr(cdr(pair))(this, cookie, next_type);
143141
car(cdr(this->curr_thread())) = next_type;
144142
this->queue = cdr(this->queue);
145143
}
146144

147145
//--------------- PARSER --------------------------------------
148146

147+
typedef struct {
148+
const char* data;
149+
size_t i;
150+
size_t len;
151+
} pstate;
152+
153+
#define pos (s->i)
154+
#define restore pos =
155+
#define advance pos +=
156+
#define next pos++
157+
#define look (s->data[pos])
158+
#define at(z) (&s->data[z])
159+
#define here at(pos)
160+
#define eofp (pos >= s->len)
161+
#define test(f) (f(look))
162+
#define chomp(str) (!strncmp(here, str, strlen(str)) ? advance strlen(str) : false)
163+
164+
static void bufadd(char** b, char c) {
165+
// super not memory efficient, it reallocs the buffer every time
166+
char* ob = *b;
167+
asprintf(b, "%s%c", *b ? *b : "", c);
168+
free(ob);
169+
}
170+
static void bufcat(char** b, const char* c, int n) {
171+
char* ob = *b;
172+
asprintf(b, "%s%.*s", *b ? *b : "", n, c);
173+
free(ob);
174+
}
175+
176+
177+
static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) {
178+
char c = look;
179+
char* b = NULL;
180+
char* b2 = NULL;
181+
object* result = nil;
182+
if (isalpha(c)) {
183+
size_t p = pos;
184+
while (!eofp && test(isalpha)) next;
185+
bufcat(&b, at(p), pos - p);
186+
result = vm->sym(b);
187+
}
188+
else if (isdigit(c)) {
189+
double d; int64_t n;
190+
int num;
191+
int ok = sscanf(here, "%lg%n", &d, &num);
192+
if (ok == 2) result = vm->number(d);
193+
else {
194+
ok = sscanf(here, "%" SCNi64 "%n", &n, &num);
195+
if (ok == 2) result = vm->integer(n);
196+
else {
197+
*error = true;
198+
result = vm->string("scanf error");
199+
}
200+
}
201+
if (ok == 2) advance num;
202+
}
203+
else if (isspace(c) && c != '\n') {
204+
result = vm->sym("SPACE");
205+
while (test(isspace) && c != '\n') next;
206+
}
207+
else if (c == '#') {
208+
// get comment or 1-character # operator
209+
next;
210+
if (look != '#') {
211+
// it's a # operator
212+
result = vm->sym("#");
213+
} else {
214+
next;
215+
if (look != '#') {
216+
// it's a line comment
217+
do bufadd(&b, look), next; while (look != '\n');
218+
result = vm->string(b);
219+
} else {
220+
// it's a block comment
221+
bufcat(&b2, "###", 3);
222+
next;
223+
while (look == '#') bufadd(&b2, '#'), next;
224+
do bufadd(&b, look), next; while (!eofp && !chomp(b2));
225+
if (eofp) {
226+
*error = true;
227+
result = vm->string("error: unterminated block comment");
228+
goto done;
229+
}
230+
result = vm->string(b);
231+
}
232+
}
233+
}
234+
else if (c == '"' || c == '\'') {
235+
char start = c;
236+
next;
237+
while (look != start && !eofp && look != '\n') {
238+
char ch = look;
239+
if (ch == '\\') {
240+
next;
241+
ch = unescape(ch);
242+
}
243+
if (ch) bufadd(&b, ch);
244+
next;
245+
}
246+
if (look != start) {
247+
*error = true;
248+
result = vm->string("error: unclosed string");
249+
}
250+
else result = vm->string(b);
251+
}
252+
else if (c == '\n') {
253+
getindent:
254+
// parser block
255+
next; // eat newline
256+
while (test(isspace) && look != '\n') {
257+
bufadd(&b2, look);
258+
next;
259+
}
260+
if (look == '\n') {
261+
free(b2);
262+
b2 = NULL;
263+
goto getindent;
264+
}
265+
// validate indent
266+
for (char* c = b2; *c; c++) {
267+
if (*c != *b2) {
268+
*error = true;
269+
result = vm->string("error: mix of spaces and tabs indenting block");
270+
goto done;
271+
}
272+
}
273+
for (;;) {
274+
// get one line
275+
do bufadd(&b, look), next; while (!eofp && look != '\n');
276+
bufadd(&b, '\n');
277+
if (eofp) break;
278+
// check indent and break
279+
chompindent:
280+
if (!chomp(b2)) {
281+
// if indent does not chomp, expect a blank line
282+
bool has_indent = false;
283+
while (test(isspace) && look != '\n') has_indent = true, next;
284+
if (look == '\n') {
285+
next;
286+
bufadd(&b, '\n');
287+
goto chompindent;
288+
}
289+
// not a blank line
290+
if (has_indent) {
291+
result = vm->string("error: unindent does not match previous indent");
292+
*error = true;
293+
goto done;
294+
}
295+
// completely unindented
296+
else break;
297+
}
298+
}
299+
result = vm->string(b);
300+
}
301+
else if (strchr("(){}[]", c)) {
302+
*special = c;
303+
}
304+
else if (ispunct(c)) {
305+
// must test for other punctuation last to allow other special cases to take precedence
306+
bufadd(&b, c);
307+
result = vm->sym(b);
308+
}
309+
else {
310+
*error = true;
311+
result = vm->string("unknown parser error");
312+
}
313+
done:
314+
free(b);
315+
free(b2);
316+
return result;
317+
}
318+
149319
// Can be called by the program
150-
void parse(pickle* vm, object* args, object* env, object* cont, object* fail_cont) {
320+
object* parse(pvm* vm, object* cookie, object* inst_type) {
321+
(void)cookie;
151322
DBG("parsing");
152-
// getarg(vm, args, 0, &string_type, env, fail_cont, vm->wrap_func(PICKLE_INLINE_FUNC {
153-
// GOTTEN_ARG(s);
154-
// const char* str = (const char*)(s->cells[0].as_chars);
155-
// object* result = s->cells[1].as_obj;
156-
// const char* message;
157-
// bool success = true;
158-
// if (result) { // Saved preparse
159-
// if (result->schema == &error_type) success = false;
160-
// goto done;
161-
// }
162-
// result = vm->wrap_string("Hello, World! parse result i am."); /* TODO: replace this with the actual parse code */
163-
// done:
164-
// if (success) vm->set_retval(vm->list(1, result), env, cont, fail_cont);
165-
// else {
166-
// result = vm->wrap_error(vm->wrap_symbol("SyntaxError"), vm->list(1, vm->wrap_string(message), result), cont);
167-
// vm->set_failure(result, env, cont, fail_cont);
168-
// }
169-
// s->cells[1].as_obj = result; // Save parse for later if constantly eval'ing string (i.e. a loop)
170-
// }));
323+
object* string = vm->pop();
324+
if (string->type != &string_type) {
325+
vm->push_data(vm->string("error: non string to parse()"));
326+
return vm->sym("error");
327+
}
328+
const char* str = vm->stringof(string);
329+
pstate s = { .data = str, .i = 0, .len = strlen(str) };
330+
bool error = false;
331+
char special = 0;
332+
object* result = do_parse(vm, &s, &error, &special);
333+
if (special) {
334+
result = vm->string("unknown syntax error");
335+
error = true;
336+
}
337+
vm->push_data(result);
338+
return error ? vm->sym("error") : nil;
171339
}
172340

173-
static object* get_best_match(pickle* vm, object* ast, object** env) {
341+
static object* get_best_match(pvm* vm, object* ast, object** env) {
174342
return NULL;
175343
}
176344

177345
// Eval(list) ::= apply_first_pattern(list), then eval(remaining list), else list if no patterns match
178-
void eval(pickle* vm, object* args, object* env, object* cont, object* fail_cont) {
346+
object* eval(pvm* vm, object* cookie, object* inst_type) {
179347
// object* ast = car(args);
180348
// // returns Match object: 0=pattern, 1=handler body, 2=match details for splice; and updates env with bindings
181349
// object* oldenv = env;
@@ -207,16 +375,15 @@ void eval(pickle* vm, object* args, object* env, object* cont, object* fail_cont
207375
// }
208376
}
209377

210-
void splice_match(pickle* vm, object* args, object* env, object* cont, object* fail_cont) {
378+
object* splice_match(pvm* vm, object* cookie, object* inst_type) {
211379
// TODO(sm);
212380
}
213381

214382
// ------------------- Circular-reference-proof object dumper -----------------------
215383
// ---------- (based on https://stackoverflow.com/a/78169673/23626926) --------------
216384

217-
static void make_refs_list(pickle* vm, object* obj, object** alist) {
385+
static void make_refs_list(pvm* vm, object* obj, object** alist) {
218386
again:
219-
DBG();
220387
if (obj == NULL || obj->type != &cons_type) return;
221388
object* entry = assoc(*alist, obj);
222389
if (entry) {
@@ -231,7 +398,7 @@ static void make_refs_list(pickle* vm, object* obj, object** alist) {
231398

232399
// returns zero if the object doesn't need a #N# marker
233400
// otherwise returns N (negative if not first time)
234-
static int64_t reffed(pickle* vm, object* obj, object* alist, int64_t* counter) {
401+
static int64_t reffed(pvm* vm, object* obj, object* alist, int64_t* counter) {
235402
object* entry = assoc(alist, obj);
236403
if (entry) {
237404
int64_t value = vm->intof(cdr(entry));
@@ -251,13 +418,24 @@ static int64_t reffed(pickle* vm, object* obj, object* alist, int64_t* counter)
251418
return 0;
252419
}
253420

254-
static void print_with_refs(pickle* vm, object* obj, object* alist, int64_t* counter) {
421+
static void print_with_refs(pvm* vm, object* obj, object* alist, int64_t* counter) {
255422
if (obj == nil) {
256423
printf("NIL");
257424
return;
258425
}
259426
#define PRINTTYPE(t, f, fmt) else if (obj->type == t) printf(fmt, obj->f)
260-
PRINTTYPE(&string_type, as_chars, "\"%s\"");
427+
else if (obj->type == &string_type) {
428+
putchar('"');
429+
for (char* c = obj->as_chars; *c; c++) {
430+
char e = escape(*c);
431+
if (e != *c) {
432+
putchar('\\');
433+
putchar(e);
434+
}
435+
else putchar(*c);
436+
}
437+
putchar('"');
438+
}
261439
PRINTTYPE(&symbol_type, as_chars, strchr(obj->as_chars, ' ') ? "#|%s|" : "%s");
262440
PRINTTYPE(&integer_type, as_big_int, "%" PRId64);
263441
PRINTTYPE(&float_type, as_double, "%lg");
@@ -301,7 +479,7 @@ static void print_with_refs(pickle* vm, object* obj, object* alist, int64_t* cou
301479
}
302480
}
303481

304-
void pickle::dump(object* obj) {
482+
void pvm::dump(object* obj) {
305483
object* alist = NULL;
306484
int64_t counter = 1;
307485
make_refs_list(this, obj, &alist);

0 commit comments

Comments
 (0)