From 6fc8f91e0d96ae4b4ee59ea562574cc04fdf8abf Mon Sep 17 00:00:00 2001 From: c+1 Date: Sat, 21 Oct 2023 09:10:58 -0400 Subject: ⬣ --- src/include/lexer.h | 11 ++++++++-- src/include/token.h | 7 ++++++- src/include/tree.h | 46 +++++++++++++--------------------------- src/lexer.c | 60 ++++++++++++++++++++++++++++++++--------------------- src/token.c | 25 ++++++++++++++++++++-- 5 files changed, 88 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/include/lexer.h b/src/include/lexer.h index b2bf9eb..83ace59 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -16,8 +16,10 @@ typedef struct LEXER_STRUC { enum LEXER_STATE { /* normal 1-character token */ LEXER_STATE_REG, - /* character */ - LEXER_STATE_CHR, + /* definition tag */ + LEXER_STATE_TAG, + /* escaped character in string */ + LEXER_STATE_ESC, /* string */ LEXER_STATE_STR, /* definition */ @@ -46,8 +48,13 @@ void lexer_add_token(lexer_t* lexer, token_t* token); lexer_do_reg() */ void lexer_add_current_char(lexer_t* lexer, int type); +/* add first character of lexer's src to the value of the last token in tokenl, if it exists. otherwise, create new token and add it */ +void lexer_add_current_char_to_last_token(lexer_t* lexer, int type); + /* handle regular state */ void lexer_do_reg(lexer_t*); +/* handle definition tag state*/ +void lexer_do_tag(lexer_t*); /* handle character state */ void lexer_do_chr(lexer_t*); /* handle string state */ diff --git a/src/include/token.h b/src/include/token.h index 5a3a36c..a186fa9 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -34,11 +34,16 @@ typedef struct TOKEN_STRUC { } token_t; /* creates a token */ -token_t* token_init(int type, char* val); +token_t* token_init(int type, char val); /* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */ void token_destroy(token_t* token); /* return pointer to the last token */ token_t* token_last(token_t* token); +/* add a character to the token value */ +void token_add_char(token_t*, char); + +/* print a token -- for debugging purposes */ +void token_print(token_t* token); #endif diff --git a/src/include/tree.h b/src/include/tree.h index a2b71da..88287a4 100644 --- a/src/include/tree.h +++ b/src/include/tree.h @@ -4,44 +4,26 @@ #include typedef struct TREE_STRUC { - enum { - TREE_COMP, - TREE_DEF, - TREE_CALL, - TREE_TYPE_STR, + enum TREE_TYPE { TREE_TYPE_INT, + TREE_TYPE_STR, + TREE_TYPE_DEF, + TREE_TYPE_CAL, + TREE_TYPE_COND, } type; union { - struct { // === "COMPOUND" === - struct TREE_STRUC** value; - size_t size; - } comp; - - struct { // === DEFINITIONS === - char* type; // the definition type - char** tags; // the definition tags - size_t tags_size; // the number of tags - char* name; // the definition name - struct TREE_STRUC** args; // the arguments the definition will accept - size_t args_size; // the number of arguments - struct TREE_STRUC* value; // value of definition - } def; - - struct { // === CALLS === - char* target; // name of definition being called - struct TREE_STRUC** args; // arguments passed to definition - size_t args_size; // the number of arguments - } call; + struct { + int val; + } tree_int_t; - // === TYPES === - struct { // strings - char* value; - } type_str; + struct { + char* val; + } tree_str_t; - struct { // integers - int value; - } type_int; + struct { + char* id; + } tree_def_t; } data; } tree_t; diff --git a/src/lexer.c b/src/lexer.c index ba0e8e1..7f36b98 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -36,31 +36,33 @@ void lexer_add_token(lexer_t* lexer, token_t* token) { lexer->tokenl_last = token; } - log_inf("token/v:%s\t/t:%d", token->val, token->type); lexer->tokenc ++; } void lexer_add_current_char(lexer_t* lexer, int type) { - char* c; /* get the current character as a string */ token_t* t; /* the token to be added */ - c = ecalloc(2, sizeof(char)); - c[0] = *lexer->src; - c[1] = '\0'; - - t = token_init(type, c); + t = token_init(type, *lexer->src); lexer_add_token(lexer, t); } +void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) { + if (lexer->tokenl_last && lexer->tokenl_last->type == type) { + token_add_char(lexer->tokenl_last, *lexer->src); + } else { + lexer_add_current_char(lexer, type); + } +} + void lexer_do_reg(lexer_t* lexer) { switch (*lexer->src) { case SYNTAX_APPLY: lexer_add_current_char(lexer, TOKEN_APPLY); break; case SYNTAX_TAG_DELIM: - lexer_add_current_char(lexer, TOKEN_TAG_DELIM); + lexer->state = LEXER_STATE_TAG; break; case SYNTAX_NAMESPACE_DELIM: lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM); @@ -83,39 +85,49 @@ void lexer_do_reg(lexer_t* lexer) { case SYNTAX_EXPR_END: lexer_add_current_char(lexer, TOKEN_EXPR_END); break; - case SYNTAX_STR_DELIM: - lexer_add_current_char(lexer, TOKEN_STR_DELIM); - break; - case SYNTAX_CHAR_DELIM: - lexer_add_current_char(lexer, TOKEN_CHAR_DELIM); - break; case SYNTAX_LIST_DELIM: lexer_add_current_char(lexer, TOKEN_LIST_DELIM); break; + case SYNTAX_STR_DELIM: + lexer->state = LEXER_STATE_STR; + break; default: lexer_add_current_char(lexer, TOKEN_UNKNOWN); } } -void lexer_do_chr(lexer_t* lexer) { - if (*lexer->src == '\'') { - lexer->state = LEXER_STATE_REG; - } else { - token_t* t; - - t = token_init(TOKEN_CHAR, *lexer->src); - - lexer_add_token(lexer, t); +void lexer_do_tag(lexer_t* lexer) { + switch (*lexer->src) { + case SYNTAX_SET: + lexer_add_current_char(lexer, TOKEN_SET); + lexer->state = LEXER_STATE_REG; + break; + case SYNTAX_APPLY: + lexer_add_current_char(lexer, TOKEN_APPLY); + lexer->state = LEXER_STATE_REG; + break; + case SYNTAX_TAG_DELIM: + lexer_add_token(lexer, token_init(TOKEN_TAG, '\0')); + break; + default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG); } } void lexer_do_str(lexer_t* lexer) { - + if (*lexer->src == SYNTAX_STR_DELIM) { + lexer->state = LEXER_STATE_REG; + } else { + lexer_add_current_char_to_last_token(lexer, TOKEN_STR); + } } void lexer_run(lexer_t* lexer) { while (*lexer->src) { if (lexer->state == LEXER_STATE_REG) { lexer_do_reg(lexer); } + else if (lexer->state == LEXER_STATE_TAG) { lexer_do_tag(lexer); } + else if (lexer->state == LEXER_STATE_STR) { lexer_do_str(lexer); } lexer->src ++; } + + token_print(lexer->tokenl); } diff --git a/src/token.c b/src/token.c index ece32f4..935f23e 100644 --- a/src/token.c +++ b/src/token.c @@ -2,12 +2,14 @@ #include "include/token.h" -token_t* token_init(int type, char* val) { +token_t* token_init(int type, char val) { token_t* token; token = emalloc(sizeof(struct TOKEN_STRUC)); token->type = type; - token->val = val; + token->val = emalloc(2); + *token->val = val; + token->val[1] = '\0'; token->nxt = NULL; return token; @@ -32,3 +34,22 @@ token_t* token_last(token_t* token) { return t; } + +void token_add_char(token_t* token, char c) { + size_t orig; + + orig = strlen(token->val); + + token->val = erealloc(token->val, orig + sizeof c + 1); + token->val[orig] = c; + token->val[orig + 1] = '\0'; +} + +void token_print(token_t* token) { + + log_dbg("token/t=%d\t/v=%s", token->type, token->val); + + if (token->nxt) { + token_print(token->nxt); + } +} -- cgit v1.2.3