aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/namespaces.halk11
-rw-r--r--src/include/parser.h21
-rw-r--r--src/include/token.h36
-rw-r--r--src/lexer.c42
-rw-r--r--src/parser.c65
-rw-r--r--test/include/test.h2
-rw-r--r--test/parser.c219
7 files changed, 182 insertions, 214 deletions
diff --git a/examples/namespaces.halk b/examples/namespaces.halk
new file mode 100644
index 0000000..c8511be
--- /dev/null
+++ b/examples/namespaces.halk
@@ -0,0 +1,11 @@
+:nsp:people = {
+ ` Get a person's age at a specific year. `
+ :int:get_age.:nsp:person,:int:year = -.year, person:birthyear
+}
+
+:nsp:john = {
+ :int:birthyear = 2000;
+ :str:name = "John Doe";
+}
+
+print.people:get_age.john;
diff --git a/src/include/parser.h b/src/include/parser.h
index 59cd0ae..eae09d4 100644
--- a/src/include/parser.h
+++ b/src/include/parser.h
@@ -5,14 +5,27 @@
#include "tree.h"
#include "token.h"
+typedef enum PARSER_STATE {
+ PARSER_STATE_BLOCK,
+ PARSER_STATE_EXPR,
+ PARSER_STATE_LINT,
+ PARSER_STATE_LSTR,
+ PARSER_STATE_TAG,
+ PARSER_STATE_DARG,
+ PARSER_STATE_CARG,
+ PARSER_STATE_DEF,
+ PARSER_STATE_CALL,
+} parser_state_t;
+
typedef struct PARSER {
+ /* What the parser's looking at. */
+ parser_state_t state;
+
/* The token list being consumed. */
token_t* token;
/* The AST being produced. */
tree_t* tree;
-
- /* Pointer to the part of the tree the parser is currently working on. */
} parser_t;
/* Creates a new parser. */
@@ -46,7 +59,7 @@ tree_t* parser_parse_lstr(parser_t* parser);
/* Return the tree for an expression.*/
tree_t* parser_parse_expr(parser_t* parser);
-/* Return the tree for an expression. */
+/* Return the tree for an block. */
tree_t* parser_parse_block(parser_t* parser);
/* Return the tree for a definition's arguments. */
@@ -62,7 +75,7 @@ tree_t* parser_parse_carg(parser_t* parser);
tree_t* parser_parse_call(parser_t* parser);
/* Parse. */
-tree_t* parser_parse(parser_t* parser);
+void parser_parse(parser_t* parser);
/* Parse with the given parser. */
void parser_run(parser_t* parser);
diff --git a/src/include/token.h b/src/include/token.h
index f95f066..b4dd74d 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -4,24 +4,24 @@
#include "util.h"
typedef enum TOKEN_TYPE {
- TOKEN_UNKNOWN,
- TOKEN_CHAR,
- TOKEN_STR,
- TOKEN_EXPR_END,
- TOKEN_SET,
- TOKEN_LGROUP,
- TOKEN_RGROUP,
- TOKEN_APPLY,
- TOKEN_LIST_DELIM,
- TOKEN_TAG,
- TOKEN_NAMESPACE_DELIM,
- TOKEN_LBLOCK,
- TOKEN_RBLOCK,
- TOKEN_RLIST,
- TOKEN_LLIST,
- TOKEN_ESC,
- TOKEN_KWD,
- TOKEN_INT
+ TOKEN_TYPE_UNKNOWN,
+ TOKEN_TYPE_CHAR,
+ TOKEN_TYPE_STR,
+ TOKEN_TYPE_EXPR_END,
+ TOKEN_TYPE_SET,
+ TOKEN_TYPE_LGROUP,
+ TOKEN_TYPE_RGROUP,
+ TOKEN_TYPE_APPLY,
+ TOKEN_TYPE_LIST_DELIM,
+ TOKEN_TYPE_TAG,
+ TOKEN_TYPE_NAMESPACE_DELIM,
+ TOKEN_TYPE_LBLOCK,
+ TOKEN_TYPE_RBLOCK,
+ TOKEN_TYPE_RLIST,
+ TOKEN_TYPE_LLIST,
+ TOKEN_TYPE_ESC,
+ TOKEN_TYPE_KWD,
+ TOKEN_TYPE_INT
} token_type_t;
/* Token struct. */
diff --git a/src/lexer.c b/src/lexer.c
index ce7dcc0..a89c9ad 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -58,7 +58,7 @@ void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) {
void lexer_do_reg(lexer_t* lexer) {
switch (*lexer->src) {
case SYNTAX_APPLY:
- lexer_add_current_char(lexer, TOKEN_APPLY);
+ lexer_add_current_char(lexer, TOKEN_TYPE_APPLY);
break;
case SYNTAX_TAG_DELIM:
lexer->state = LEXER_STATE_TAG;
@@ -67,46 +67,46 @@ void lexer_do_reg(lexer_t* lexer) {
lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM);
break; */
case SYNTAX_SET:
- lexer_add_current_char(lexer, TOKEN_SET);
+ lexer_add_current_char(lexer, TOKEN_TYPE_SET);
break;
case SYNTAX_LLIST:
- lexer_add_current_char(lexer, TOKEN_LLIST);
+ lexer_add_current_char(lexer, TOKEN_TYPE_LLIST);
break;
case SYNTAX_RLIST:
- lexer_add_current_char(lexer, TOKEN_RLIST);
+ lexer_add_current_char(lexer, TOKEN_TYPE_RLIST);
break;
case SYNTAX_LGROUP:
- lexer_add_current_char(lexer, TOKEN_LGROUP);
+ lexer_add_current_char(lexer, TOKEN_TYPE_LGROUP);
break;
case SYNTAX_RGROUP:
- lexer_add_current_char(lexer, TOKEN_RGROUP);
+ lexer_add_current_char(lexer, TOKEN_TYPE_RGROUP);
break;
case SYNTAX_LBLOCK:
- lexer_add_current_char(lexer, TOKEN_LBLOCK);
+ lexer_add_current_char(lexer, TOKEN_TYPE_LBLOCK);
break;
case SYNTAX_RBLOCK:
- lexer_add_current_char(lexer, TOKEN_RBLOCK);
+ lexer_add_current_char(lexer, TOKEN_TYPE_RBLOCK);
break;
case SYNTAX_EXPR_END:
- lexer_add_current_char(lexer, TOKEN_EXPR_END);
+ lexer_add_current_char(lexer, TOKEN_TYPE_EXPR_END);
break;
case SYNTAX_LIST_DELIM:
- lexer_add_current_char(lexer, TOKEN_LIST_DELIM);
+ lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM);
break;
case SYNTAX_STR_DELIM:
lexer->state = LEXER_STATE_STR;
break;
default:
if (isdigit(*lexer->src)) {
- lexer_add_current_char(lexer, TOKEN_INT);
+ lexer_add_current_char(lexer, TOKEN_TYPE_INT);
if (isdigit(*(lexer->src + 1))) { lexer->state = LEXER_STATE_INT; }
} else if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) {
- lexer_add_current_char(lexer, TOKEN_KWD);
+ lexer_add_current_char(lexer, TOKEN_TYPE_KWD);
if (strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1))) { lexer->state = LEXER_STATE_KWD; }
} else {
- lexer_add_current_char(lexer, TOKEN_UNKNOWN);
+ lexer_add_current_char(lexer, TOKEN_TYPE_UNKNOWN);
lexer->state = LEXER_STATE_REG;
}
}
@@ -115,21 +115,21 @@ void lexer_do_reg(lexer_t* lexer) {
void lexer_do_tag(lexer_t* lexer) {
switch (*lexer->src) {
case SYNTAX_LIST_DELIM:
- lexer_add_current_char(lexer, TOKEN_LIST_DELIM);
+ lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM);
lexer->state = LEXER_STATE_REG;
break;
case SYNTAX_SET:
- lexer_add_current_char(lexer, TOKEN_SET);
+ lexer_add_current_char(lexer, TOKEN_TYPE_SET);
lexer->state = LEXER_STATE_REG;
break;
case SYNTAX_APPLY:
- lexer_add_current_char(lexer, TOKEN_APPLY);
+ lexer_add_current_char(lexer, TOKEN_TYPE_APPLY);
lexer->state = LEXER_STATE_REG;
break;
case SYNTAX_TAG_DELIM:
- lexer_add_token(lexer, token_init(TOKEN_TAG, '\0'));
+ lexer_add_token(lexer, token_init(TOKEN_TYPE_TAG, '\0'));
break;
- default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG);
+ default: lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_TAG);
}
}
@@ -137,13 +137,13 @@ void lexer_do_str(lexer_t* lexer) {
if (*lexer->src == SYNTAX_STR_DELIM) {
lexer->state = LEXER_STATE_REG;
} else {
- lexer_add_current_char_to_last_token(lexer, TOKEN_STR);
+ lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_STR);
}
}
void lexer_do_int(lexer_t* lexer) {
if (isdigit(*lexer->src)) {
- lexer_add_current_char_to_last_token(lexer, TOKEN_INT);
+ lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_INT);
! isdigit(*(lexer->src + 1)) && ( lexer->state = LEXER_STATE_REG );
} else {
log_err("int state at non-int token");
@@ -152,7 +152,7 @@ void lexer_do_int(lexer_t* lexer) {
void lexer_do_kwd(lexer_t* lexer) {
if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) {
- lexer_add_current_char_to_last_token(lexer, TOKEN_KWD);
+ lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_KWD);
! strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1)) &&
( lexer->state = LEXER_STATE_REG );
} else {
diff --git a/src/parser.c b/src/parser.c
index 0359c80..bb945db 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -16,7 +16,9 @@ void parser_destroy(parser_t* parser) {
int parser_nxt_token(parser_t* parser) {
if (parser->token->nxt) {
- parser->token = parser->token->nxt;
+ token_t* nxt = parser->token->nxt;
+ free(parser->token);
+ parser->token = nxt;
return 1;
} else {
return 0;
@@ -60,19 +62,19 @@ tree_t* parser_parse_expr(parser_t* parser) {
expr = tree_init(TREE_TYPE_EXPR);
switch (parser->token->type) {
- case TOKEN_INT:
+ case TOKEN_TYPE_INT:
expr->data.expr.val = parser_parse_lint(parser);
break;
- case TOKEN_STR:
+ case TOKEN_TYPE_STR:
expr->data.expr.val = parser_parse_lstr(parser);
break;
- case TOKEN_KWD:
+ case TOKEN_TYPE_KWD:
expr->data.expr.val = parser_parse_call(parser);
break;
- case TOKEN_TAG:
+ case TOKEN_TYPE_TAG:
expr->data.expr.val = parser_parse_def(parser);
break;
- case TOKEN_LBLOCK:
+ case TOKEN_TYPE_LBLOCK:
parser_nxt_token(parser);
expr->data.expr.val = parser_parse_block(parser);
break;
@@ -85,26 +87,28 @@ tree_t* parser_parse_expr(parser_t* parser) {
}
tree_t* parser_parse_block(parser_t* parser) {
- if (
- ! parser->token ||
- parser->token->type == TOKEN_EXPR_END ||
- parser->token->type == TOKEN_RBLOCK
- ) { return NULL; }
-
tree_t* block;
block = tree_init(TREE_TYPE_BLOCK);
- block->data.block.val = parser_parse_expr(parser);
- block->data.block.nxt = parser_nxt_token(parser) ?
- parser_parse_block(parser) :
- NULL;
+ if (!parser->token) {
+ block->data.block.val = NULL;
+ block->data.block.nxt = NULL;
+ } else {
+ switch (parser->token->type) {
+ case TOKEN_TYPE_INT:
+ block->data.block.val = parser_parse_lint(parser);
+ break;
+ }
+ }
+
+ block->data.block.nxt = NULL;
return block;
}
tree_t* parser_parse_tag(parser_t* parser) {
- if (parser->token->type != TOKEN_TAG) { return NULL; }
+ if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; }
tree_t* tag;
@@ -126,13 +130,13 @@ tree_t* parser_parse_darg(parser_t* parser) {
darg = tree_init(TREE_TYPE_DARG);
- if (parser->token->type != TOKEN_TAG) { return NULL; }
+ if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; }
darg->data.darg.tag = parser_parse_tag(parser);
log_war("%d", parser->token->type);
- if (parser->token->type != TOKEN_LIST_DELIM) {
+ if (parser->token->type != TOKEN_TYPE_LIST_DELIM) {
darg->data.darg.nxt = NULL;
} else {
parser_nxt_token(parser) &&
@@ -149,10 +153,10 @@ tree_t* parser_parse_def(parser_t* parser) {
def->data.def.tag = parser_parse_tag(parser);
- parser->token->type == TOKEN_APPLY &&
+ parser->token->type == TOKEN_TYPE_APPLY &&
parser_nxt_token(parser) &&
( def->data.def.arg = parser_parse_darg(parser) );
- parser->token->type == TOKEN_SET &&
+ parser->token->type == TOKEN_TYPE_SET &&
parser_nxt_token(parser) &&
( def->data.def.val = parser_parse_expr(parser) );
@@ -166,7 +170,7 @@ tree_t* parser_parse_carg(parser_t* parser) {
carg->data.carg.val = parser_parse_expr(parser);
carg->data.carg.nxt = (
- parser_nxt_token_match(parser, TOKEN_LIST_DELIM) &&
+ parser_nxt_token_match(parser, TOKEN_TYPE_LIST_DELIM) &&
parser_nxt_token(parser)
) ?
parser_parse_carg(parser) :
@@ -183,7 +187,7 @@ tree_t* parser_parse_call(parser_t* parser) {
call->data.call.target = parser->token->val;
parser->token->val = NULL;
call->data.call.arg = (
- parser_nxt_token_match(parser, TOKEN_APPLY) && parser_nxt_token(parser) ?
+ parser_nxt_token_match(parser, TOKEN_TYPE_APPLY) && parser_nxt_token(parser) ?
parser_parse_carg(parser) :
NULL
);
@@ -191,10 +195,19 @@ tree_t* parser_parse_call(parser_t* parser) {
return call;
}
-tree_t* parser_parse(parser_t* parser) {
- return parser_parse_block(parser);
+void parser_parse(parser_t* parser) {
+ while (parser->token) {
+ switch (parser->token->type) {
+ case TOKEN_TYPE_INT:
+ parser->tree->data.block.val = parser_parse_lint(parser);
+ break;
+ }
+
+ parser_nxt_token(parser);
+ }
+
}
void parser_run(parser_t* parser) {
- parser->tree = parser_parse(parser);
+ parser->tree = parser_parse_block(parser);
}
diff --git a/test/include/test.h b/test/include/test.h
index 9a2aa5a..c5fd97e 100644
--- a/test/include/test.h
+++ b/test/include/test.h
@@ -6,6 +6,8 @@
extern unsigned int TESTS_RUN;
extern unsigned int TESTS_PASSED;
+#define TEST_INIT unsigned int TESTS_RUN = 0, TESTS_PASSED = 0;
+
#define ASSERT(EXPR) \
TESTS_RUN++; \
(EXPR && ++TESTS_PASSED) ? \
diff --git a/test/parser.c b/test/parser.c
index 78e4066..db296e6 100644
--- a/test/parser.c
+++ b/test/parser.c
@@ -4,163 +4,92 @@
#include "../src/include/tree.h"
#include "../src/include/parser.h"
-unsigned int TESTS_RUN = 0, TESTS_PASSED = 0;
-
-#define PARSER_SETUP(SRC) \
- pp = pp_init(SRC); \
- pp_run(pp); \
- lexer = lexer_init(pp->psrc); \
- lexer_run(lexer); \
- parser = parser_init(lexer->tokenl); \
+TEST_INIT
+
+void test_simple_empty() {
+ tree_t* tree;
+ pp_t* pp;
+ lexer_t* lexer;
+ parser_t* parser;
+
+ char src[] = " ";
+
+ /*
+
+ [block]
+ val:
+ NULL
+ nxt:
+ NULL
+
+ */
+
+ tree = tree_init(TREE_TYPE_BLOCK);
+ tree->data.block.val = NULL;
+ tree->data.block.nxt = NULL;
+
+ pp = pp_init(src);
+ pp_run(pp);
+
+ lexer = lexer_init(pp->psrc);
+ lexer_run(lexer);
+
+ parser = parser_init(lexer->tokenl);
parser_run(parser);
-int main(int argc, char** argv) {
- tree_t* tree_0;
+ ASSERT(tree_cmp(parser->tree, tree) == 1);
+
+ pp_destroy(pp);
+ parser_destroy(parser);
+ lexer_destroy(lexer);
+}
+
+void test_single_lint() {
+ tree_t* tree;
pp_t* pp;
lexer_t* lexer;
parser_t* parser;
- /* Simple empty block. */
- char src_0[] = "";
+ char src[] = "1";
- /*
+ /*
- [block]
+ [block]
+ val:
+ [lint]
val:
- NULL
- nxt:
- NULL
-
- */
-
- tree_0 = tree_init(TREE_TYPE_BLOCK);
- tree_0->data.block.val = NULL;
- tree_0->data.block.nxt = NULL;
-
- PARSER_SETUP(src_0);
-
- tree_print(tree_0, 0);
- tree_print(parser->tree, 0);
-
- ASSERT(tree_cmp(parser->tree, tree_0) == 1);
-
- /* More complicated tree. */
- char src_1[] = "" \
- ":int:f = {" \
- "a.b;" \
- "c.d"
- "}";
- /*
-
- [block]
- val:
- [expression]
- val:
- [def]
- tag:
- [tag]
- val:
- "int"
- nxt:
- [tag]
- val:
- "f"
- nxt:
- NULL
- arg:
- NULL
- val:
- [expression]
- val:
- [block]
- val:
- [expression]
- val:
- [call]
- target:
- "a"
- arg:
- [carg]
- val:
- [call]
- target:
- "b"
- arg:
- NULL
- nxt:
- NULL
- nxt:
- [block]
- val:
- [expression]
- val:
- [call]
- target:
- "c"
- arg:
- [carg]
- val:
- [call]
- target:
- "d"
- nxt:
- NULL
- nxt:
- NULL
- nxt:
- NULL
- nxt:
- NULL
-
- */
-
- tree_0 = tree_init(TREE_TYPE_BLOCK);
- tree_t* treep_00 = tree_0->data.block.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_01 = treep_00->data.expr.val = tree_init(TREE_TYPE_DEF);
- tree_t* treep_02 = treep_01->data.def.tag = tree_init(TREE_TYPE_TAG);
- treep_02->data.tag.val = "int";
- tree_t* treep_03 = treep_02->data.tag.nxt = tree_init(TREE_TYPE_TAG);
- treep_03->data.tag.val = "f";
- treep_03->data.tag.nxt = NULL;
- treep_01->data.def.arg = NULL;
- tree_t* treep_04 = treep_01->data.def.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_05 = treep_04->data.expr.val = tree_init(TREE_TYPE_BLOCK);
- tree_t* treep_06 = treep_05->data.block.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_07 = treep_06->data.expr.val = tree_init(TREE_TYPE_CALL);
- treep_07->data.call.target = "a";
- tree_t* treep_08 = treep_07->data.call.arg = tree_init(TREE_TYPE_CARG);
- tree_t* treep_09 = treep_08->data.carg.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_10 = treep_09->data.expr.val = tree_init(TREE_TYPE_CALL);
- treep_10->data.call.target = "b";
- treep_10->data.call.arg = NULL;
- treep_08->data.carg.nxt = NULL;
- tree_t* treep_11 = treep_05->data.block.nxt = tree_init(TREE_TYPE_BLOCK);
- tree_t* treep_12 = treep_11->data.block.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_13 = treep_12->data.expr.val = tree_init(TREE_TYPE_CALL);
- treep_13->data.call.target = "c";
- tree_t* treep_14 = treep_13->data.call.arg = tree_init(TREE_TYPE_CARG);
- tree_t* treep_15 = treep_14->data.carg.val = tree_init(TREE_TYPE_EXPR);
- tree_t* treep_16 = treep_15->data.expr.val = tree_init(TREE_TYPE_CALL);
- treep_16->data.call.target = "d";
- treep_16->data.call.arg = NULL;
- treep_14->data.carg.nxt = NULL;
- treep_11->data.block.nxt = NULL;
- tree_0->data.block.nxt = NULL;
-
- pp = pp_init(src_0);
- pp_run(pp);
-
- lexer = lexer_init(pp->psrc);
- lexer_run(lexer);
-
- parser = parser_init(lexer->tokenl);
- parser_run(parser);
-
- ASSERT(tree_cmp(tree_0, parser->tree));
+ 1
+ nxt:
+ NULL
+
+ */
+ tree = tree_init(TREE_TYPE_BLOCK);
+ tree->data.block.val = tree_init(TREE_TYPE_LINT);
+ tree->data.block.val->data.lint.val = 1;
+ tree->data.block.nxt = NULL;
+
+ pp = pp_init(src);
+ pp_run(pp);
+
+ lexer = lexer_init(pp->psrc);
+ lexer_run(lexer);
+
+ parser = parser_init(lexer->tokenl);
+ parser_run(parser);
+
+ ASSERT(tree_cmp(parser->tree, tree) == 1);
+
+ token_destroy(lexer->tokenl);
+ lexer_destroy(lexer);
pp_destroy(pp);
+ tree_destroy(parser->tree);
parser_destroy(parser);
- lexer_destroy(lexer);
+}
+
+int main(int argc, char** argv) {
+ test_simple_empty();
+ test_single_lint();
TEST_REPORT;