From 0ff1d40842390da36908c7ffce62f2cf33b173b9 Mon Sep 17 00:00:00 2001 From: c Date: Mon, 22 Jan 2024 13:30:47 -0500 Subject: Not sure. --- examples/namespaces.halk | 11 +++ src/include/parser.h | 21 ++++- src/include/token.h | 36 ++++---- src/lexer.c | 42 ++++----- src/parser.c | 65 ++++++++------ test/include/test.h | 2 + test/parser.c | 219 ++++++++++++++++------------------------------- 7 files changed, 182 insertions(+), 214 deletions(-) create mode 100644 examples/namespaces.halk diff --git a/examples/namespaces.halk b/examples/namespaces.halk new file mode 100644 index 0000000..c8511be --- /dev/null +++ b/examples/namespaces.halk @@ -0,0 +1,11 @@ +:nsp:people = { + ` Get a person's age at a specific year. ` + :int:get_age.:nsp:person,:int:year = -.year, person:birthyear +} + +:nsp:john = { + :int:birthyear = 2000; + :str:name = "John Doe"; +} + +print.people:get_age.john; diff --git a/src/include/parser.h b/src/include/parser.h index 59cd0ae..eae09d4 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -5,14 +5,27 @@ #include "tree.h" #include "token.h" +typedef enum PARSER_STATE { + PARSER_STATE_BLOCK, + PARSER_STATE_EXPR, + PARSER_STATE_LINT, + PARSER_STATE_LSTR, + PARSER_STATE_TAG, + PARSER_STATE_DARG, + PARSER_STATE_CARG, + PARSER_STATE_DEF, + PARSER_STATE_CALL, +} parser_state_t; + typedef struct PARSER { + /* What the parser's looking at. */ + parser_state_t state; + /* The token list being consumed. */ token_t* token; /* The AST being produced. */ tree_t* tree; - - /* Pointer to the part of the tree the parser is currently working on. */ } parser_t; /* Creates a new parser. */ @@ -46,7 +59,7 @@ tree_t* parser_parse_lstr(parser_t* parser); /* Return the tree for an expression.*/ tree_t* parser_parse_expr(parser_t* parser); -/* Return the tree for an expression. */ +/* Return the tree for an block. */ tree_t* parser_parse_block(parser_t* parser); /* Return the tree for a definition's arguments. */ @@ -62,7 +75,7 @@ tree_t* parser_parse_carg(parser_t* parser); tree_t* parser_parse_call(parser_t* parser); /* Parse. */ -tree_t* parser_parse(parser_t* parser); +void parser_parse(parser_t* parser); /* Parse with the given parser. */ void parser_run(parser_t* parser); diff --git a/src/include/token.h b/src/include/token.h index f95f066..b4dd74d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,24 +4,24 @@ #include "util.h" typedef enum TOKEN_TYPE { - TOKEN_UNKNOWN, - TOKEN_CHAR, - TOKEN_STR, - TOKEN_EXPR_END, - TOKEN_SET, - TOKEN_LGROUP, - TOKEN_RGROUP, - TOKEN_APPLY, - TOKEN_LIST_DELIM, - TOKEN_TAG, - TOKEN_NAMESPACE_DELIM, - TOKEN_LBLOCK, - TOKEN_RBLOCK, - TOKEN_RLIST, - TOKEN_LLIST, - TOKEN_ESC, - TOKEN_KWD, - TOKEN_INT + TOKEN_TYPE_UNKNOWN, + TOKEN_TYPE_CHAR, + TOKEN_TYPE_STR, + TOKEN_TYPE_EXPR_END, + TOKEN_TYPE_SET, + TOKEN_TYPE_LGROUP, + TOKEN_TYPE_RGROUP, + TOKEN_TYPE_APPLY, + TOKEN_TYPE_LIST_DELIM, + TOKEN_TYPE_TAG, + TOKEN_TYPE_NAMESPACE_DELIM, + TOKEN_TYPE_LBLOCK, + TOKEN_TYPE_RBLOCK, + TOKEN_TYPE_RLIST, + TOKEN_TYPE_LLIST, + TOKEN_TYPE_ESC, + TOKEN_TYPE_KWD, + TOKEN_TYPE_INT } token_type_t; /* Token struct. */ diff --git a/src/lexer.c b/src/lexer.c index ce7dcc0..a89c9ad 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -58,7 +58,7 @@ void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) { void lexer_do_reg(lexer_t* lexer) { switch (*lexer->src) { case SYNTAX_APPLY: - lexer_add_current_char(lexer, TOKEN_APPLY); + lexer_add_current_char(lexer, TOKEN_TYPE_APPLY); break; case SYNTAX_TAG_DELIM: lexer->state = LEXER_STATE_TAG; @@ -67,46 +67,46 @@ void lexer_do_reg(lexer_t* lexer) { lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM); break; */ case SYNTAX_SET: - lexer_add_current_char(lexer, TOKEN_SET); + lexer_add_current_char(lexer, TOKEN_TYPE_SET); break; case SYNTAX_LLIST: - lexer_add_current_char(lexer, TOKEN_LLIST); + lexer_add_current_char(lexer, TOKEN_TYPE_LLIST); break; case SYNTAX_RLIST: - lexer_add_current_char(lexer, TOKEN_RLIST); + lexer_add_current_char(lexer, TOKEN_TYPE_RLIST); break; case SYNTAX_LGROUP: - lexer_add_current_char(lexer, TOKEN_LGROUP); + lexer_add_current_char(lexer, TOKEN_TYPE_LGROUP); break; case SYNTAX_RGROUP: - lexer_add_current_char(lexer, TOKEN_RGROUP); + lexer_add_current_char(lexer, TOKEN_TYPE_RGROUP); break; case SYNTAX_LBLOCK: - lexer_add_current_char(lexer, TOKEN_LBLOCK); + lexer_add_current_char(lexer, TOKEN_TYPE_LBLOCK); break; case SYNTAX_RBLOCK: - lexer_add_current_char(lexer, TOKEN_RBLOCK); + lexer_add_current_char(lexer, TOKEN_TYPE_RBLOCK); break; case SYNTAX_EXPR_END: - lexer_add_current_char(lexer, TOKEN_EXPR_END); + lexer_add_current_char(lexer, TOKEN_TYPE_EXPR_END); break; case SYNTAX_LIST_DELIM: - lexer_add_current_char(lexer, TOKEN_LIST_DELIM); + lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM); break; case SYNTAX_STR_DELIM: lexer->state = LEXER_STATE_STR; break; default: if (isdigit(*lexer->src)) { - lexer_add_current_char(lexer, TOKEN_INT); + lexer_add_current_char(lexer, TOKEN_TYPE_INT); if (isdigit(*(lexer->src + 1))) { lexer->state = LEXER_STATE_INT; } } else if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { - lexer_add_current_char(lexer, TOKEN_KWD); + lexer_add_current_char(lexer, TOKEN_TYPE_KWD); if (strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1))) { lexer->state = LEXER_STATE_KWD; } } else { - lexer_add_current_char(lexer, TOKEN_UNKNOWN); + lexer_add_current_char(lexer, TOKEN_TYPE_UNKNOWN); lexer->state = LEXER_STATE_REG; } } @@ -115,21 +115,21 @@ void lexer_do_reg(lexer_t* lexer) { void lexer_do_tag(lexer_t* lexer) { switch (*lexer->src) { case SYNTAX_LIST_DELIM: - lexer_add_current_char(lexer, TOKEN_LIST_DELIM); + lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM); lexer->state = LEXER_STATE_REG; break; case SYNTAX_SET: - lexer_add_current_char(lexer, TOKEN_SET); + lexer_add_current_char(lexer, TOKEN_TYPE_SET); lexer->state = LEXER_STATE_REG; break; case SYNTAX_APPLY: - lexer_add_current_char(lexer, TOKEN_APPLY); + lexer_add_current_char(lexer, TOKEN_TYPE_APPLY); lexer->state = LEXER_STATE_REG; break; case SYNTAX_TAG_DELIM: - lexer_add_token(lexer, token_init(TOKEN_TAG, '\0')); + lexer_add_token(lexer, token_init(TOKEN_TYPE_TAG, '\0')); break; - default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG); + default: lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_TAG); } } @@ -137,13 +137,13 @@ void lexer_do_str(lexer_t* lexer) { if (*lexer->src == SYNTAX_STR_DELIM) { lexer->state = LEXER_STATE_REG; } else { - lexer_add_current_char_to_last_token(lexer, TOKEN_STR); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_STR); } } void lexer_do_int(lexer_t* lexer) { if (isdigit(*lexer->src)) { - lexer_add_current_char_to_last_token(lexer, TOKEN_INT); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_INT); ! isdigit(*(lexer->src + 1)) && ( lexer->state = LEXER_STATE_REG ); } else { log_err("int state at non-int token"); @@ -152,7 +152,7 @@ void lexer_do_int(lexer_t* lexer) { void lexer_do_kwd(lexer_t* lexer) { if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { - lexer_add_current_char_to_last_token(lexer, TOKEN_KWD); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_KWD); ! strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1)) && ( lexer->state = LEXER_STATE_REG ); } else { diff --git a/src/parser.c b/src/parser.c index 0359c80..bb945db 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,7 +16,9 @@ void parser_destroy(parser_t* parser) { int parser_nxt_token(parser_t* parser) { if (parser->token->nxt) { - parser->token = parser->token->nxt; + token_t* nxt = parser->token->nxt; + free(parser->token); + parser->token = nxt; return 1; } else { return 0; @@ -60,19 +62,19 @@ tree_t* parser_parse_expr(parser_t* parser) { expr = tree_init(TREE_TYPE_EXPR); switch (parser->token->type) { - case TOKEN_INT: + case TOKEN_TYPE_INT: expr->data.expr.val = parser_parse_lint(parser); break; - case TOKEN_STR: + case TOKEN_TYPE_STR: expr->data.expr.val = parser_parse_lstr(parser); break; - case TOKEN_KWD: + case TOKEN_TYPE_KWD: expr->data.expr.val = parser_parse_call(parser); break; - case TOKEN_TAG: + case TOKEN_TYPE_TAG: expr->data.expr.val = parser_parse_def(parser); break; - case TOKEN_LBLOCK: + case TOKEN_TYPE_LBLOCK: parser_nxt_token(parser); expr->data.expr.val = parser_parse_block(parser); break; @@ -85,26 +87,28 @@ tree_t* parser_parse_expr(parser_t* parser) { } tree_t* parser_parse_block(parser_t* parser) { - if ( - ! parser->token || - parser->token->type == TOKEN_EXPR_END || - parser->token->type == TOKEN_RBLOCK - ) { return NULL; } - tree_t* block; block = tree_init(TREE_TYPE_BLOCK); - block->data.block.val = parser_parse_expr(parser); - block->data.block.nxt = parser_nxt_token(parser) ? - parser_parse_block(parser) : - NULL; + if (!parser->token) { + block->data.block.val = NULL; + block->data.block.nxt = NULL; + } else { + switch (parser->token->type) { + case TOKEN_TYPE_INT: + block->data.block.val = parser_parse_lint(parser); + break; + } + } + + block->data.block.nxt = NULL; return block; } tree_t* parser_parse_tag(parser_t* parser) { - if (parser->token->type != TOKEN_TAG) { return NULL; } + if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; } tree_t* tag; @@ -126,13 +130,13 @@ tree_t* parser_parse_darg(parser_t* parser) { darg = tree_init(TREE_TYPE_DARG); - if (parser->token->type != TOKEN_TAG) { return NULL; } + if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; } darg->data.darg.tag = parser_parse_tag(parser); log_war("%d", parser->token->type); - if (parser->token->type != TOKEN_LIST_DELIM) { + if (parser->token->type != TOKEN_TYPE_LIST_DELIM) { darg->data.darg.nxt = NULL; } else { parser_nxt_token(parser) && @@ -149,10 +153,10 @@ tree_t* parser_parse_def(parser_t* parser) { def->data.def.tag = parser_parse_tag(parser); - parser->token->type == TOKEN_APPLY && + parser->token->type == TOKEN_TYPE_APPLY && parser_nxt_token(parser) && ( def->data.def.arg = parser_parse_darg(parser) ); - parser->token->type == TOKEN_SET && + parser->token->type == TOKEN_TYPE_SET && parser_nxt_token(parser) && ( def->data.def.val = parser_parse_expr(parser) ); @@ -166,7 +170,7 @@ tree_t* parser_parse_carg(parser_t* parser) { carg->data.carg.val = parser_parse_expr(parser); carg->data.carg.nxt = ( - parser_nxt_token_match(parser, TOKEN_LIST_DELIM) && + parser_nxt_token_match(parser, TOKEN_TYPE_LIST_DELIM) && parser_nxt_token(parser) ) ? parser_parse_carg(parser) : @@ -183,7 +187,7 @@ tree_t* parser_parse_call(parser_t* parser) { call->data.call.target = parser->token->val; parser->token->val = NULL; call->data.call.arg = ( - parser_nxt_token_match(parser, TOKEN_APPLY) && parser_nxt_token(parser) ? + parser_nxt_token_match(parser, TOKEN_TYPE_APPLY) && parser_nxt_token(parser) ? parser_parse_carg(parser) : NULL ); @@ -191,10 +195,19 @@ tree_t* parser_parse_call(parser_t* parser) { return call; } -tree_t* parser_parse(parser_t* parser) { - return parser_parse_block(parser); +void parser_parse(parser_t* parser) { + while (parser->token) { + switch (parser->token->type) { + case TOKEN_TYPE_INT: + parser->tree->data.block.val = parser_parse_lint(parser); + break; + } + + parser_nxt_token(parser); + } + } void parser_run(parser_t* parser) { - parser->tree = parser_parse(parser); + parser->tree = parser_parse_block(parser); } diff --git a/test/include/test.h b/test/include/test.h index 9a2aa5a..c5fd97e 100644 --- a/test/include/test.h +++ b/test/include/test.h @@ -6,6 +6,8 @@ extern unsigned int TESTS_RUN; extern unsigned int TESTS_PASSED; +#define TEST_INIT unsigned int TESTS_RUN = 0, TESTS_PASSED = 0; + #define ASSERT(EXPR) \ TESTS_RUN++; \ (EXPR && ++TESTS_PASSED) ? \ diff --git a/test/parser.c b/test/parser.c index 78e4066..db296e6 100644 --- a/test/parser.c +++ b/test/parser.c @@ -4,163 +4,92 @@ #include "../src/include/tree.h" #include "../src/include/parser.h" -unsigned int TESTS_RUN = 0, TESTS_PASSED = 0; - -#define PARSER_SETUP(SRC) \ - pp = pp_init(SRC); \ - pp_run(pp); \ - lexer = lexer_init(pp->psrc); \ - lexer_run(lexer); \ - parser = parser_init(lexer->tokenl); \ +TEST_INIT + +void test_simple_empty() { + tree_t* tree; + pp_t* pp; + lexer_t* lexer; + parser_t* parser; + + char src[] = " "; + + /* + + [block] + val: + NULL + nxt: + NULL + + */ + + tree = tree_init(TREE_TYPE_BLOCK); + tree->data.block.val = NULL; + tree->data.block.nxt = NULL; + + pp = pp_init(src); + pp_run(pp); + + lexer = lexer_init(pp->psrc); + lexer_run(lexer); + + parser = parser_init(lexer->tokenl); parser_run(parser); -int main(int argc, char** argv) { - tree_t* tree_0; + ASSERT(tree_cmp(parser->tree, tree) == 1); + + pp_destroy(pp); + parser_destroy(parser); + lexer_destroy(lexer); +} + +void test_single_lint() { + tree_t* tree; pp_t* pp; lexer_t* lexer; parser_t* parser; - /* Simple empty block. */ - char src_0[] = ""; + char src[] = "1"; - /* + /* - [block] + [block] + val: + [lint] val: - NULL - nxt: - NULL - - */ - - tree_0 = tree_init(TREE_TYPE_BLOCK); - tree_0->data.block.val = NULL; - tree_0->data.block.nxt = NULL; - - PARSER_SETUP(src_0); - - tree_print(tree_0, 0); - tree_print(parser->tree, 0); - - ASSERT(tree_cmp(parser->tree, tree_0) == 1); - - /* More complicated tree. */ - char src_1[] = "" \ - ":int:f = {" \ - "a.b;" \ - "c.d" - "}"; - /* - - [block] - val: - [expression] - val: - [def] - tag: - [tag] - val: - "int" - nxt: - [tag] - val: - "f" - nxt: - NULL - arg: - NULL - val: - [expression] - val: - [block] - val: - [expression] - val: - [call] - target: - "a" - arg: - [carg] - val: - [call] - target: - "b" - arg: - NULL - nxt: - NULL - nxt: - [block] - val: - [expression] - val: - [call] - target: - "c" - arg: - [carg] - val: - [call] - target: - "d" - nxt: - NULL - nxt: - NULL - nxt: - NULL - nxt: - NULL - - */ - - tree_0 = tree_init(TREE_TYPE_BLOCK); - tree_t* treep_00 = tree_0->data.block.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_01 = treep_00->data.expr.val = tree_init(TREE_TYPE_DEF); - tree_t* treep_02 = treep_01->data.def.tag = tree_init(TREE_TYPE_TAG); - treep_02->data.tag.val = "int"; - tree_t* treep_03 = treep_02->data.tag.nxt = tree_init(TREE_TYPE_TAG); - treep_03->data.tag.val = "f"; - treep_03->data.tag.nxt = NULL; - treep_01->data.def.arg = NULL; - tree_t* treep_04 = treep_01->data.def.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_05 = treep_04->data.expr.val = tree_init(TREE_TYPE_BLOCK); - tree_t* treep_06 = treep_05->data.block.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_07 = treep_06->data.expr.val = tree_init(TREE_TYPE_CALL); - treep_07->data.call.target = "a"; - tree_t* treep_08 = treep_07->data.call.arg = tree_init(TREE_TYPE_CARG); - tree_t* treep_09 = treep_08->data.carg.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_10 = treep_09->data.expr.val = tree_init(TREE_TYPE_CALL); - treep_10->data.call.target = "b"; - treep_10->data.call.arg = NULL; - treep_08->data.carg.nxt = NULL; - tree_t* treep_11 = treep_05->data.block.nxt = tree_init(TREE_TYPE_BLOCK); - tree_t* treep_12 = treep_11->data.block.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_13 = treep_12->data.expr.val = tree_init(TREE_TYPE_CALL); - treep_13->data.call.target = "c"; - tree_t* treep_14 = treep_13->data.call.arg = tree_init(TREE_TYPE_CARG); - tree_t* treep_15 = treep_14->data.carg.val = tree_init(TREE_TYPE_EXPR); - tree_t* treep_16 = treep_15->data.expr.val = tree_init(TREE_TYPE_CALL); - treep_16->data.call.target = "d"; - treep_16->data.call.arg = NULL; - treep_14->data.carg.nxt = NULL; - treep_11->data.block.nxt = NULL; - tree_0->data.block.nxt = NULL; - - pp = pp_init(src_0); - pp_run(pp); - - lexer = lexer_init(pp->psrc); - lexer_run(lexer); - - parser = parser_init(lexer->tokenl); - parser_run(parser); - - ASSERT(tree_cmp(tree_0, parser->tree)); + 1 + nxt: + NULL + + */ + tree = tree_init(TREE_TYPE_BLOCK); + tree->data.block.val = tree_init(TREE_TYPE_LINT); + tree->data.block.val->data.lint.val = 1; + tree->data.block.nxt = NULL; + + pp = pp_init(src); + pp_run(pp); + + lexer = lexer_init(pp->psrc); + lexer_run(lexer); + + parser = parser_init(lexer->tokenl); + parser_run(parser); + + ASSERT(tree_cmp(parser->tree, tree) == 1); + + token_destroy(lexer->tokenl); + lexer_destroy(lexer); pp_destroy(pp); + tree_destroy(parser->tree); parser_destroy(parser); - lexer_destroy(lexer); +} + +int main(int argc, char** argv) { + test_simple_empty(); + test_single_lint(); TEST_REPORT; -- cgit v1.2.3