From 668e0fb0f4fc4bdd990d9ab349da445960d5764e Mon Sep 17 00:00:00 2001 From: c+1 Date: Thu, 5 Oct 2023 11:02:22 -0400 Subject: redesign the lexer, some mem leaks :( --- src/lexer.c | 167 +++++++++++++----------------------------------------------- 1 file changed, 35 insertions(+), 132 deletions(-) (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c index 8374a90..e9475b6 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -5,12 +5,15 @@ #include "include/lexer.h" -lexer_t* lexer_init(char* content) { - lexer_t* lexer = calloc(1, sizeof(struct LEXER_STRUC)); +lexer_t* lexer_init(char* src) { + lexer_t* lexer; - lexer->content = content; - lexer->i = 0; - lexer->c = content[lexer->i]; + lexer = emalloc(sizeof(struct LEXER_STRUC)); + + lexer->src = src; + lexer->state = LEXER_STATE_REG; + lexer->tokenl = NULL; + lexer->tokenc = 0; return lexer; } @@ -19,144 +22,44 @@ void lexer_destroy(lexer_t* lexer) { free(lexer); } -void lexer_next(lexer_t* lexer) { - if (LEXER_VALID) { - lexer->i ++; - lexer->c = lexer->content[lexer->i]; - } -} +void lexer_add_token(lexer_t* lexer, token_t* token) { + token_t* t; -void lexer_pass(lexer_t* lexer) { - while (char_can_ignore(&lexer->c)) { - lexer_next(lexer); - } -} + t = token_last(lexer->tokenl)->nxt; + t = token; -token_t* lexer_get_next_token(lexer_t* lexer) { - while (LEXER_VALID) { - - if (char_can_ignore(&lexer->c)) { lexer_pass(lexer); } - if (char_could_start_int(&lexer->c)) { return lexer_next_token(lexer, TOKEN_PRIM_INT); } - if (char_could_start_keyword(&lexer->c)) { return lexer_collect(lexer, token_char_kywrd, 0, 0, TOKEN_KEYWORD); } - - switch (lexer->c) { - case SYNTAX_STR_DELIM: - return lexer_collect(lexer, token_char_quote, 1, 1, TOKEN_PRIM_STR); - break; - case SYNTAX_EXPR_END: - return lexer_next_token(lexer, TOKEN_STMNT_END); - break; - case SYNTAX_SET: - return lexer_next_token(lexer, TOKEN_DEF_SET); - break; - case SYNTAX_LGROUP: - return lexer_next_token(lexer, TOKEN_LGROUP); - break; - case SYNTAX_RGROUP: - return lexer_next_token(lexer, TOKEN_RGROUP); - break; - case SYNTAX_APPLY: - return lexer_next_token(lexer, TOKEN_FN_APPLY); - break; - case SYNTAX_LIST_DELIM: - return lexer_next_token(lexer, TOKEN_LIST_DELIM); - break; - case SYNTAX_TAG_DELIM: - return lexer_collect(lexer, token_char_kywrd, 1, 0, TOKEN_DEF_TAG); - break; - case SYNTAX_NAMESPACE_DELIM: - return lexer_next_token(lexer, TOKEN_NAMESPACE_DELIM); - break; - case SYNTAX_LBLOCK: - return lexer_next_token(lexer, TOKEN_BLOCK_START); - break; - case SYNTAX_RBLOCK: - return lexer_next_token(lexer, TOKEN_BLOCK_END); - break; - case SYNTAX_LLIST: - return lexer_next_token(lexer, TOKEN_ARRAY_START); - break; - case SYNTAX_RLIST: - return lexer_next_token(lexer, TOKEN_ARRAY_END); - break; - case '\0': - case EOF: - return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); - break; - default: - return lexer_next_token(lexer, TOKEN_UNKNOWN); - break; - } - } + log_inf("token/v:%s\t/t:%d", token->val, token->type); - return NULL; + lexer->tokenc ++; } -token_t* lexer_next_token(lexer_t* lexer, int token_type) { - token_t* token = token_init(token_type, lexer_get_c_as_string(lexer)); - lexer_next(lexer); - return token; -} - -char* lexer_get_c_as_string(lexer_t* lexer) { - char* str; /* the string to return */ - - str = malloc(2); - str[0] = lexer->c; - str[1] = '\0'; - - return str; -} - -token_t* lexer_collect(lexer_t* lexer, int (*end_char)(char), int fskip, int lskip, int type) { - size_t len; /* length of collected token so far */ - char* token; /* collected token so far */ - - len = 0; - token = calloc(len, sizeof(char)); - - if (fskip) { lexer_next(lexer); } +void lexer_add_current_char(lexer_t* lexer, int type) { + char* c; /* get the current character as a string */ + token_t* t; /* the token to be added */ - while (end_char(lexer->c)) { - char* current; + c = ecalloc(2, sizeof(char)); + c[0] = *lexer->src; + c[1] = '\0'; - current = lexer_get_c_as_string(lexer); - token = realloc( - token, - (len + sizeof(current)) - ); + t = token_init(type, c); - memcpy(token + len, current, sizeof(char) * strlen(current)); - len += strlen(current) * sizeof(char); - lexer_next(lexer); - - free(current); - } - - if (lskip) { lexer_next(lexer); } - - token[len] = '\0'; /* terminate */ - - return token_init(type, token); + lexer_add_token(lexer, t); } -lexer_t* lexer_run(lexer_t* lexer) { - while (1) { - token_t* token; - char* type; - - token = lexer_get_next_token(lexer); - type = token_get_type(token->type); - - log_inf("type: %s\t\tval:%s", type, token->value); - - if (token->type == TOKEN_EOF) { - token_destroy(token); +void lexer_do_reg(lexer_t* lexer) { + switch (*lexer->src) { + case SYNTAX_APPLY: + lexer_add_current_char(lexer, TOKEN_APPLY); break; - } - - token_destroy(token); + default: + lexer_add_current_char(lexer, TOKEN_UNKNOWN); + } +} - return lexer; +void lexer_run(lexer_t* lexer) { + while (*lexer->src) { + if (lexer->state == LEXER_STATE_REG) { lexer_do_reg(lexer); } + lexer->src ++; + } } -- cgit v1.2.3