From fb2efbd868556da31d128c755beeefd37b0178c3 Mon Sep 17 00:00:00 2001 From: c+1 Date: Sat, 28 Oct 2023 09:59:56 -0400 Subject: fixed numbers. Added keywords. --- src/include/syntax.h | 5 ++++- src/include/token.h | 1 + src/include/tree.h | 5 +++++ src/lexer.c | 22 +++++++++++++++++++--- 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/include/syntax.h b/src/include/syntax.h index b4410c5..777fea4 100644 --- a/src/include/syntax.h +++ b/src/include/syntax.h @@ -14,11 +14,14 @@ #define SYNTAX_APPLY '.' #define SYNTAX_LIST_DELIM ',' #define SYNTAX_TAG_DELIM ':' -#define SYNTAX_NAMESPACE_DELIM '/' +#define SYNTAX_NAMESPACE_DELIM '!' #define SYNTAX_LBLOCK '{' #define SYNTAX_RBLOCK '}' #define SYNTAX_RLIST '[' #define SYNTAX_LLIST ']' #define SYNTAX_ESC '\\' +/* characters that can appear in keywords */ +#define SYNTAX_KWD_CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_~|&+-/*<>=%^$@?" + #endif diff --git a/src/include/token.h b/src/include/token.h index 129ed76..9e64c97 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -24,6 +24,7 @@ typedef struct TOKEN_STRUC { TOKEN_RLIST, TOKEN_LLIST, TOKEN_ESC, + TOKEN_KWD, TOKEN_INT } type; diff --git a/src/include/tree.h b/src/include/tree.h index a240a18..108f646 100644 --- a/src/include/tree.h +++ b/src/include/tree.h @@ -27,6 +27,11 @@ typedef struct TREE_T_STRUCT { arg_t* arg; struct TREE_T_STRUCT* val; } tree_def; + + struct { + char* id; + arg_t* arg; + } tree_cal; } data; } tree_t; diff --git a/src/lexer.c b/src/lexer.c index d52029c..613e161 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -94,12 +94,16 @@ void lexer_do_reg(lexer_t* lexer) { default: if (isdigit(*lexer->src)) { lexer_add_current_char(lexer, TOKEN_INT); - lexer->state = LEXER_STATE_INT; + if (isdigit(*(lexer->src + 1))) { lexer->state = LEXER_STATE_INT; } + + } else if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { + lexer_add_current_char(lexer, TOKEN_KWD); + if (strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1))) { lexer->state = LEXER_STATE_KWD; } + } else { lexer_add_current_char(lexer, TOKEN_UNKNOWN); lexer->state = LEXER_STATE_REG; } - } } @@ -133,8 +137,18 @@ void lexer_do_int(lexer_t* lexer) { lexer_add_current_char_to_last_token(lexer, TOKEN_INT); if (! isdigit(*(lexer->src + 1))) { lexer->state = LEXER_STATE_REG; } } else { - log_err("???"); + log_err("int state at non-int token"); + } +} + +void lexer_do_kwd(lexer_t* lexer) { + if (strchr(SYNTAX_KWD_CHARS, *lexer->src)) { + lexer_add_current_char_to_last_token(lexer, TOKEN_KWD); + if (! strchr(SYNTAX_KWD_CHARS, *(lexer->src + 1))) { lexer->state = LEXER_STATE_REG; } + } else { + log_err("keyword state at non-keyword token"); } + } void lexer_run(lexer_t* lexer) { @@ -143,8 +157,10 @@ void lexer_run(lexer_t* lexer) { else if (lexer->state == LEXER_STATE_TAG) { lexer_do_tag(lexer); } else if (lexer->state == LEXER_STATE_STR) { lexer_do_str(lexer); } else if (lexer->state == LEXER_STATE_INT) { lexer_do_int(lexer); } + else if (lexer->state == LEXER_STATE_KWD) { lexer_do_kwd(lexer); } lexer->src ++; } + /* print tokens *AFTER* they've been discovered */ token_print(lexer->tokenl); } -- cgit v1.2.3