From 0ff1d40842390da36908c7ffce62f2cf33b173b9 Mon Sep 17 00:00:00 2001 From: c Date: Mon, 22 Jan 2024 13:30:47 -0500 Subject: Not sure. --- src/include/parser.h | 21 +++++++++++++---- src/include/token.h | 36 ++++++++++++++--------------- src/lexer.c | 42 ++++++++++++++++----------------- src/parser.c | 65 +++++++++++++++++++++++++++++++--------------------- 4 files changed, 95 insertions(+), 69 deletions(-) (limited to 'src') diff --git a/src/include/parser.h b/src/include/parser.h index 59cd0ae..eae09d4 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -5,14 +5,27 @@ #include "tree.h" #include "token.h" +typedef enum PARSER_STATE { + PARSER_STATE_BLOCK, + PARSER_STATE_EXPR, + PARSER_STATE_LINT, + PARSER_STATE_LSTR, + PARSER_STATE_TAG, + PARSER_STATE_DARG, + PARSER_STATE_CARG, + PARSER_STATE_DEF, + PARSER_STATE_CALL, +} parser_state_t; + typedef struct PARSER { + /* What the parser's looking at. */ + parser_state_t state; + /* The token list being consumed. */ token_t* token; /* The AST being produced. */ tree_t* tree; - - /* Pointer to the part of the tree the parser is currently working on. */ } parser_t; /* Creates a new parser. */ @@ -46,7 +59,7 @@ tree_t* parser_parse_lstr(parser_t* parser); /* Return the tree for an expression.*/ tree_t* parser_parse_expr(parser_t* parser); -/* Return the tree for an expression. */ +/* Return the tree for an block. */ tree_t* parser_parse_block(parser_t* parser); /* Return the tree for a definition's arguments. */ @@ -62,7 +75,7 @@ tree_t* parser_parse_carg(parser_t* parser); tree_t* parser_parse_call(parser_t* parser); /* Parse. */ -tree_t* parser_parse(parser_t* parser); +void parser_parse(parser_t* parser); /* Parse with the given parser. */ void parser_run(parser_t* parser); diff --git a/src/include/token.h b/src/include/token.h index f95f066..b4dd74d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,24 +4,24 @@ #include "util.h" typedef enum TOKEN_TYPE { - TOKEN_UNKNOWN, - TOKEN_CHAR, - TOKEN_STR, - TOKEN_EXPR_END, - TOKEN_SET, - TOKEN_LGROUP, - TOKEN_RGROUP, - TOKEN_APPLY, - TOKEN_LIST_DELIM, - TOKEN_TAG, - TOKEN_NAMESPACE_DELIM, - TOKEN_LBLOCK, - TOKEN_RBLOCK, - TOKEN_RLIST, - TOKEN_LLIST, - TOKEN_ESC, - TOKEN_KWD, - TOKEN_INT + TOKEN_TYPE_UNKNOWN, + TOKEN_TYPE_CHAR, + TOKEN_TYPE_STR, + TOKEN_TYPE_EXPR_END, + TOKEN_TYPE_SET, + TOKEN_TYPE_LGROUP, + TOKEN_TYPE_RGROUP, + TOKEN_TYPE_APPLY, + TOKEN_TYPE_LIST_DELIM, + TOKEN_TYPE_TAG, + TOKEN_TYPE_NAMESPACE_DELIM, + TOKEN_TYPE_LBLOCK, + TOKEN_TYPE_RBLOCK, + TOKEN_TYPE_RLIST, + TOKEN_TYPE_LLIST, + TOKEN_TYPE_ESC, + TOKEN_TYPE_KWD, + TOKEN_TYPE_INT } token_type_t; /* Token struct. */ diff --git a/src/lexer.c b/src/lexer.c index ce7dcc0..a89c9ad 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -58,7 +58,7 @@ void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) { void lexer_do_reg(lexer_t* lexer) { switch (*lexer->src) { case SYNTAX_APPLY: - lexer_add_current_char(lexer, TOKEN_APPLY); + lexer_add_current_char(lexer, TOKEN_TYPE_APPLY); break; case SYNTAX_TAG_DELIM: lexer->state = LEXER_STATE_TAG; @@ -67,46 +67,46 @@ void lexer_do_reg(lexer_t* lexer) { lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM); break; */ case SYNTAX_SET: - lexer_add_current_char(lexer, TOKEN_SET); + lexer_add_current_char(lexer, TOKEN_TYPE_SET); break; case SYNTAX_LLIST: - lexer_add_current_char(lexer, TOKEN_LLIST); + lexer_add_current_char(lexer, TOKEN_TYPE_LLIST); break; case SYNTAX_RLIST: - lexer_add_current_char(lexer, TOKEN_RLIST); + lexer_add_current_char(lexer, TOKEN_TYPE_RLIST); break; case SYNTAX_LGROUP: - lexer_add_current_char(lexer, TOKEN_LGROUP); + lexer_add_current_char(lexer, TOKEN_TYPE_LGROUP); break; case SYNTAX_RGROUP: - lexer_add_current_char(lexer, TOKEN_RGROUP); + lexer_add_current_char(lexer, TOKEN_TYPE_RGROUP); break; case SYNTAX_LBLOCK: - lexer_add_current_char(lexer, TOKEN_LBLOCK); + lexer_add_current_char(lexer, TOKEN_TYPE_LBLOCK); break; case SYNTAX_RBLOCK: - lexer_add_current_char(lexer, TOKEN_RBLOCK); + lexer_add_current_char(lexer, TOKEN_TYPE_RBLOCK); break; case SYNTAX_EXPR_END: - lexer_add_current_char(lexer, TOKEN_EXPR_END); + lexer_add_current_char(lexer, TOKEN_TYPE_EXPR_END); break; case SYNTAX_LIST_DELIM: - lexer_add_current_char(lexer, TOKEN_LIST_DELIM); + lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM); break; case SYNTAX_STR_DELIM: lexer->state = LEXER_STATE_STR; break; default: if (isdigit(*lexer->src)) { - lexer_add_current_char(lexer, TOKEN_INT); + lexer_add_current_char(lexer, TOKEN_TYPE_INT); if (isdigit(*(lexer->src + 1))) { lexer->state = LEXER_STATE_INT; } } else if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { - lexer_add_current_char(lexer, TOKEN_KWD); + lexer_add_current_char(lexer, TOKEN_TYPE_KWD); if (strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1))) { lexer->state = LEXER_STATE_KWD; } } else { - lexer_add_current_char(lexer, TOKEN_UNKNOWN); + lexer_add_current_char(lexer, TOKEN_TYPE_UNKNOWN); lexer->state = LEXER_STATE_REG; } } @@ -115,21 +115,21 @@ void lexer_do_reg(lexer_t* lexer) { void lexer_do_tag(lexer_t* lexer) { switch (*lexer->src) { case SYNTAX_LIST_DELIM: - lexer_add_current_char(lexer, TOKEN_LIST_DELIM); + lexer_add_current_char(lexer, TOKEN_TYPE_LIST_DELIM); lexer->state = LEXER_STATE_REG; break; case SYNTAX_SET: - lexer_add_current_char(lexer, TOKEN_SET); + lexer_add_current_char(lexer, TOKEN_TYPE_SET); lexer->state = LEXER_STATE_REG; break; case SYNTAX_APPLY: - lexer_add_current_char(lexer, TOKEN_APPLY); + lexer_add_current_char(lexer, TOKEN_TYPE_APPLY); lexer->state = LEXER_STATE_REG; break; case SYNTAX_TAG_DELIM: - lexer_add_token(lexer, token_init(TOKEN_TAG, '\0')); + lexer_add_token(lexer, token_init(TOKEN_TYPE_TAG, '\0')); break; - default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG); + default: lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_TAG); } } @@ -137,13 +137,13 @@ void lexer_do_str(lexer_t* lexer) { if (*lexer->src == SYNTAX_STR_DELIM) { lexer->state = LEXER_STATE_REG; } else { - lexer_add_current_char_to_last_token(lexer, TOKEN_STR); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_STR); } } void lexer_do_int(lexer_t* lexer) { if (isdigit(*lexer->src)) { - lexer_add_current_char_to_last_token(lexer, TOKEN_INT); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_INT); ! isdigit(*(lexer->src + 1)) && ( lexer->state = LEXER_STATE_REG ); } else { log_err("int state at non-int token"); @@ -152,7 +152,7 @@ void lexer_do_int(lexer_t* lexer) { void lexer_do_kwd(lexer_t* lexer) { if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { - lexer_add_current_char_to_last_token(lexer, TOKEN_KWD); + lexer_add_current_char_to_last_token(lexer, TOKEN_TYPE_KWD); ! strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1)) && ( lexer->state = LEXER_STATE_REG ); } else { diff --git a/src/parser.c b/src/parser.c index 0359c80..bb945db 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,7 +16,9 @@ void parser_destroy(parser_t* parser) { int parser_nxt_token(parser_t* parser) { if (parser->token->nxt) { - parser->token = parser->token->nxt; + token_t* nxt = parser->token->nxt; + free(parser->token); + parser->token = nxt; return 1; } else { return 0; @@ -60,19 +62,19 @@ tree_t* parser_parse_expr(parser_t* parser) { expr = tree_init(TREE_TYPE_EXPR); switch (parser->token->type) { - case TOKEN_INT: + case TOKEN_TYPE_INT: expr->data.expr.val = parser_parse_lint(parser); break; - case TOKEN_STR: + case TOKEN_TYPE_STR: expr->data.expr.val = parser_parse_lstr(parser); break; - case TOKEN_KWD: + case TOKEN_TYPE_KWD: expr->data.expr.val = parser_parse_call(parser); break; - case TOKEN_TAG: + case TOKEN_TYPE_TAG: expr->data.expr.val = parser_parse_def(parser); break; - case TOKEN_LBLOCK: + case TOKEN_TYPE_LBLOCK: parser_nxt_token(parser); expr->data.expr.val = parser_parse_block(parser); break; @@ -85,26 +87,28 @@ tree_t* parser_parse_expr(parser_t* parser) { } tree_t* parser_parse_block(parser_t* parser) { - if ( - ! parser->token || - parser->token->type == TOKEN_EXPR_END || - parser->token->type == TOKEN_RBLOCK - ) { return NULL; } - tree_t* block; block = tree_init(TREE_TYPE_BLOCK); - block->data.block.val = parser_parse_expr(parser); - block->data.block.nxt = parser_nxt_token(parser) ? - parser_parse_block(parser) : - NULL; + if (!parser->token) { + block->data.block.val = NULL; + block->data.block.nxt = NULL; + } else { + switch (parser->token->type) { + case TOKEN_TYPE_INT: + block->data.block.val = parser_parse_lint(parser); + break; + } + } + + block->data.block.nxt = NULL; return block; } tree_t* parser_parse_tag(parser_t* parser) { - if (parser->token->type != TOKEN_TAG) { return NULL; } + if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; } tree_t* tag; @@ -126,13 +130,13 @@ tree_t* parser_parse_darg(parser_t* parser) { darg = tree_init(TREE_TYPE_DARG); - if (parser->token->type != TOKEN_TAG) { return NULL; } + if (parser->token->type != TOKEN_TYPE_TAG) { return NULL; } darg->data.darg.tag = parser_parse_tag(parser); log_war("%d", parser->token->type); - if (parser->token->type != TOKEN_LIST_DELIM) { + if (parser->token->type != TOKEN_TYPE_LIST_DELIM) { darg->data.darg.nxt = NULL; } else { parser_nxt_token(parser) && @@ -149,10 +153,10 @@ tree_t* parser_parse_def(parser_t* parser) { def->data.def.tag = parser_parse_tag(parser); - parser->token->type == TOKEN_APPLY && + parser->token->type == TOKEN_TYPE_APPLY && parser_nxt_token(parser) && ( def->data.def.arg = parser_parse_darg(parser) ); - parser->token->type == TOKEN_SET && + parser->token->type == TOKEN_TYPE_SET && parser_nxt_token(parser) && ( def->data.def.val = parser_parse_expr(parser) ); @@ -166,7 +170,7 @@ tree_t* parser_parse_carg(parser_t* parser) { carg->data.carg.val = parser_parse_expr(parser); carg->data.carg.nxt = ( - parser_nxt_token_match(parser, TOKEN_LIST_DELIM) && + parser_nxt_token_match(parser, TOKEN_TYPE_LIST_DELIM) && parser_nxt_token(parser) ) ? parser_parse_carg(parser) : @@ -183,7 +187,7 @@ tree_t* parser_parse_call(parser_t* parser) { call->data.call.target = parser->token->val; parser->token->val = NULL; call->data.call.arg = ( - parser_nxt_token_match(parser, TOKEN_APPLY) && parser_nxt_token(parser) ? + parser_nxt_token_match(parser, TOKEN_TYPE_APPLY) && parser_nxt_token(parser) ? parser_parse_carg(parser) : NULL ); @@ -191,10 +195,19 @@ tree_t* parser_parse_call(parser_t* parser) { return call; } -tree_t* parser_parse(parser_t* parser) { - return parser_parse_block(parser); +void parser_parse(parser_t* parser) { + while (parser->token) { + switch (parser->token->type) { + case TOKEN_TYPE_INT: + parser->tree->data.block.val = parser_parse_lint(parser); + break; + } + + parser_nxt_token(parser); + } + } void parser_run(parser_t* parser) { - parser->tree = parser_parse(parser); + parser->tree = parser_parse_block(parser); } -- cgit v1.2.3