diff options
author | s-over-4 | 2023-06-23 01:57:04 -0400 |
---|---|---|
committer | s-over-4 | 2023-06-23 01:57:04 -0400 |
commit | e850a08fa7a763140b9c86308cfdff9bae421c2e (patch) | |
tree | 0042d0bba020391f956ed95160dca8375d9c64db | |
parent | 76e952ec1756deae78d9a88a67b97eff3550959e (diff) |
parser groundwork
-rw-r--r-- | src/include/parser.h | 53 | ||||
-rw-r--r-- | src/parser.c | 173 |
2 files changed, 57 insertions, 169 deletions
diff --git a/src/include/parser.h b/src/include/parser.h index 2811469..32cd4bc 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -1,45 +1,52 @@ #ifndef PARSER_H #define PARSER_H -#include "token.h" -#include "tree.h" #include "lexer.h" - +#include "tree.h" typedef struct PARSER_STRUC { - lexer_t* lexer; - token_t* token; + lexer_t* lexer; // lexer used by the parser + token_t* token; // current token } parser_t; -// initialize a parser parser_t* parser_init(lexer_t* lexer); void parser_destroy(parser_t* parser); -// check for expected token, or throw syntax error +// expect token, or die void parser_token_expect(parser_t* parser, int (*expected_token)(token_t*)); // do the parse tree_t* parser_parse(parser_t* parser); -tree_t* parser_parse_token(parser_t* parser); -// parse hunks -tree_t* parser_parse_hunk(parser_t* parser); -tree_t* parser_parse_hunks(parser_t* parser); +// parse statements +// - end in semicolon or ) or } +// - can contain one expression +// - return what they evaluate to +tree_t* parser_parse_statements(parser_t* parser); +tree_t* parser_parse_statement(parser_t* parser); + +// parse expressions +// - in parenthetical +// - contain only one statement +// - return what that statement evaluates to +// - adds layer of scope) +// basically a statement, but with a layer of scope +tree_t* parser_parse_exprs(parser_t* parser); +tree_t* parser_parse_expr(parser_t* parser); + +// parse blocks +// - in curly brackets +// - many statements +// - return what last statement evaluates to +tree_t* parser_parse_blocks(parser_t* parser); +tree_t* parser_parse_block(parser_t* parser); + +// blocks contain many statements, any of which may contain any number of expressions -// leaves of the tree -tree_t* parser_parse_prim(parser_t* parser); -tree_t* parser_parse_subtree(parser_t* parser); tree_t* parser_parse_def(parser_t* parser); tree_t* parser_parse_call(parser_t* parser); -// primitives -tree_t* parser_parse_prim_str(parser_t* parser); -tree_t* parser_parse_prim_int(parser_t* parser); -tree_t* parser_parse_prim_unkwn(parser_t* parser); - -// defs -tree_t* parser_parse_def_tags(parser_t* parser); -int parser_is_def_tag(char* maybe_tag); - +tree_t* parser_parse_type_str(parser_t* parser); +tree_t* parser_parse_type_int(parser_t* parser); #endif diff --git a/src/parser.c b/src/parser.c index c106132..d2720b7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2,173 +2,54 @@ #include <stdio.h> #include <string.h> - +#include "include/token.h" +#include "include/tree.h" #include "include/util.h" +#include "include/lexer.h" #include "include/parser.h" -#include "include/token.h" - -// initialize a parser parser_t* parser_init(lexer_t* lexer) { - parser_t* parser = calloc(1, sizeof(struct PARSER_STRUC)); + parser_t* parser; + parser = calloc(1, sizeof(struct PARSER_STRUC)); parser->lexer = lexer; parser->token = lexer_get_next_token(lexer); return parser; } -// check for expected token, or throw syntax error -void parser_token_expect(parser_t* parser, int token_type) { - if (parser->token->type == token_type) { - log_inf(strcat("Got expected token", lexer_get_c_as_string(parser->lexer))); - parser->token = lexer_get_next_token(parser->lexer); - } else { - die("Unexpected token type: [%d]", token_type); - } -} - -// creates the abstract syntax tree -tree_t* parser_parse(parser_t* parser) { - return parser_parse_hunks(parser); -} - -tree_t* parser_parse_token(parser_t* parser) { - if (parser_is_def_tag(parser->token->value)) { - return parser_parse_def(parser); - } else { - return parser_parse_call(parser); - } -} - -// parse a single chunk -tree_t* parser_parse_chunk(parser_t* parser) { - switch (parser->token->type) { - case TOKEN_KEYWORD: { - return parser_parse_keyword(parser); - break; - } - default: { - log_inf("Skipping non-keyword token"); - lexer_next(parser->lexer); - break; - } - } -} +void parser_destroy(parser_t* parser) { free(parser); } -// parse a list of chunks -tree_t* parser_parse_chunks(parser_t* parser) { - tree_t* subtree = tree_init(TREE_SUBTREE); - subtree->data.subtree.val = calloc(1, sizeof(struct TREE_STRUC)); - - tree_t* chunk = parser_parse_chunk(parser); - subtree->data.subtree.val[0] = chunk; - - while (parser->token->type == TOKEN_END) { - parser_check_expect(parser, TOKEN_END); - - tree_t* chunk = parser_parse_chunk(parser); - subtree->data.subtree.size += 1; - subtree->data.subtree.val = realloc( - subtree->data.subtree.val, - subtree->data.subtree.size * sizeof(struct TREE_STRUC) +void parser_token_expect(parser_t* parser, int (*expected_token)(token_t*)) { + expected_token(parser->token)? + parser->token = lexer_get_next_token(parser->lexer): + die( + "unexpected token!\n\ttype: [%s]\n\tvalue: [%s]", + token_get_type(parser->token->type), + parser->token->value ); - subtree->data.subtree.val[subtree->data.subtree.size - 1] = chunk; - - } - - return subtree; } -tree_t* parser_parse_expr(parser_t* parser) { - switch (parser->token->type) { - case TOKEN_STR: - parser_parse_str(parser); - break; - - default: - log_war("Skipping unknown token"); - lexer_next(parser->lexer); - - } -}; +tree_t* parser_parse(parser_t* parser) { return parser_parse_statements(parser); } -tree_t* parser_parse_fac(parser_t* parser) {}; +tree_t* parser_parse_statements(parser_t* parser) { + tree_t* comp; + tree_t* statement; -tree_t* parser_parse_term(parser_t* parser) {}; + comp = tree_init(TREE_COMP); + comp->data.comp.value = calloc(1, sizeof(struct TREE_STRUC*)); -tree_t* parser_parse_fn_call(parser_t* parser) {}; + statement = parser_parse_statement(parser); -tree_t* parser_parse_fn_def(parser_t* parser) {}; + comp->data.comp.value[0] = statement; + comp->data.comp.size += 1; -tree_t* parser_parse_var(parser_t* parser) { - char* token_val = parser->token->value; + while (parser->token->type == TOKEN_STMNT_END) { + statement = parser_parse_statement(parser); - parser_check_expect(parser, TOKEN_KEYWORD); // var name or fn name - - // check if function - if (parser->token->type == TOKEN_FN_APPLY) { - return parser_parse_fn_call(parser); + comp->data.comp.value[0] = statement; + comp->data.comp.size += 1; } - - tree_t* var = tree_init(TREE_VAR); - var->data.var.name = token_val; - return var; -}; - -tree_t* parser_parse_var_def(parser_t* parser) { - int var_is_const; - parser_check_expect(parser, TOKEN_KEYWORD); // let - char* var_name = parser->token->value; // set variable name - parser_check_expect(parser, TOKEN_KEYWORD); // expect variable name & advance - - if (parser->token->type == TOKEN_DEFINE_CONST) { // check either constant or mutable variable - var_is_const = 1; - parser_check_expect(parser, TOKEN_DEFINE_CONST); - } else { - var_is_const = 0; - parser_check_expect(parser, TOKEN_DEFINE_MUT); - } - - tree_t* var_val = parser_parse_expr(parser); // set the value - tree_t* var_def = tree_init(TREE_VAR_DEF); // create the var, as a subtree - - var_def->data.var_def.name = var_name; - var_def->data.var_def.val = var_val; - var_def->data.var_def.is_const = &var_is_const; - - return var_def; -}; - -tree_t* parser_parse_str(parser_t* parser) { - log_inf("Entered str"); - tree_t* str = tree_init(TREE_STR); - str->data.str.val = parser->token->value; - parser_check_expect(parser, TOKEN_STR); - - return str; -}; - -tree_t* parser_parse_keyword(parser_t* parser) { - if (strcmp(parser->token->value, "let")) { - return parser_parse_var_def(parser); - } else if (strcmp(parser->token->value, "fn")) { - return parser_parse_fn_def(parser); - } else { - return parser_parse_var(parser); // assume attempting to call - // variable/function; - // TODO: differentiate b/w the - // two on call; may be a syntax - // change :P just to make the - // interpreter easier - } -} - - -int parser_is_def_tag(char* maybe_tag) { - if (strcmp(maybe_tag, "int") || - strcmp(maybe_tag, "str")) { - return 1; - } else { return 0; } + return comp; } |