diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/lexer.h | 82 | ||||
-rw-r--r-- | src/include/token.h | 78 |
2 files changed, 77 insertions, 83 deletions
diff --git a/src/include/lexer.h b/src/include/lexer.h index 8d4f75f..173c57d 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -9,46 +9,52 @@ /* the lexer struct */ typedef struct LEXER_STRUC { - /* current character in content */ - char c; - - /* index of c */ - unsigned int i; - /* source being read */ - char* content; + char* src; + + /* what the lexer is looking at right now */ + enum LEXER_STATE { + /* normal 1-character token */ + LEXER_STATE_REG, + /* character */ + LEXER_STATE_CHR, + /* string */ + LEXER_STATE_STR, + /* definition */ + LEXER_STATE_DEF, + /* call */ + LEXER_STATE_CAL + } state; + + /* the linked list of tokens generated */ + token_t* tokenl; + int tokenc; } lexer_t; - -/* create lexer from source code */ -extern lexer_t* lexer_init (char* content); - -/* destroy the lexer */ -extern void lexer_destroy (lexer_t* lexer); - -/* move lexer forward one char */ -extern void lexer_next (lexer_t* lexer); - -/* skip useless characters */ -extern void lexer_pass (lexer_t* lexer); - -/* create tokens */ -extern token_t* lexer_get_next_token (lexer_t* lexer); - -/* create token and move 1 char */ -extern token_t* lexer_next_token (lexer_t* lexer, int token_type); - -/* create string from lexer->c */ -extern char* lexer_get_c_as_string (lexer_t* lexer); - -/* - int fskip: skip first char? - - int lskip: skip last char? -*/ -extern token_t* lexer_collect (lexer_t* lexer, int (*end_char)(char), int fskip, int lskip, int type); - -/* run lexer from source */ -lexer_t* lexer_run(lexer_t*); +/* create lexer from source */ +lexer_t* lexer_init (char* src); + +/* destroy lexer **but not src or tokenl** */ +void lexer_destroy (lexer_t* lexer); + +/* add token to tokenv */ +void lexer_add_token(lexer_t* lexer, token_t* token); +/* add the current character as a token to tokenl -- utility function for + lexer_do_reg() */ +void lexer_add_current_char(lexer_t* lexer, int type); + +/* handle regular state */ +void lexer_do_reg(lexer_t*); +/* handle character state */ +void lexer_do_chr(lexer_t*); +/* handle string state */ +void lexer_do_str(lexer_t*); +/* handle definition state */ +void lexer_do_def(lexer_t*); +/* handle call state */ +void lexer_do_cal(lexer_t*); + +/* run lexer */ +void lexer_run(lexer_t*); #endif diff --git a/src/include/token.h b/src/include/token.h index 1a307cd..802f13d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -1,56 +1,44 @@ #ifndef TOKEN_H #define TOKEN_H -#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS "+-/*abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" -#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN 57 -#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS "1234567890_-" -#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN 12 -#define TOKEN_CHAR_IGNORE " \t\n\r" -#define TOKEN_CHAR_IGNORE_LEN 4 -#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789" - +#include "util.h" +/* token struct */ typedef struct TOKEN_STRUC { - enum TOKEN_ENUM { - TOKEN_KEYWORD, // keyword - TOKEN_PRIM_STR, // "string" - TOKEN_PRIM_INT, // 42 - TOKEN_COMM, // `comment` - TOKEN_STMNT_END, // ; - TOKEN_LGROUP, // ( - TOKEN_RGROUP, // ) - TOKEN_DIRECTIVE, // #DIRECTIVE# - TOKEN_FN_APPLY, // . - TOKEN_LIST_DELIM, // , - TOKEN_DEF_TAG, // def:def - TOKEN_BLOCK_START, // { - TOKEN_BLOCK_END, // } - TOKEN_NAMESPACE_DELIM, // / - TOKEN_ARRAY_START, // [ - TOKEN_ARRAY_END, // ] - TOKEN_DEF_SET, // = - TOKEN_UNKNOWN, // ??? - TOKEN_EOF, // \0 - } type; - - char* value; + /* token type */ + enum TOKEN_TYPE { + TOKEN_UNKNOWN, + TOKEN_CHAR_DELIM, + TOKEN_STR_DELIM, + TOKEN_COMMENT_DELIM, + TOKEN_EXPR_END, + TOKEN_SET, + TOKEN_LGROUP, + TOKEN_RGROUP, + TOKEN_APPLY, + TOKEN_LIST_DELIM, + TOKEN_TAG_DELIM, + TOKEN_NAMESPACE_DELIM, + TOKEN_LBLOCK, + TOKEN_RBLOCK, + TOKEN_RLIST, + TOKEN_LLIST, + TOKEN_ESC + } type; + + /* token value */ + char* val; + + /* next token */ + struct TOKEN_STRUC* nxt; } token_t; +/* creates a token */ token_t* token_init(int type, char* val); - -char* token_get_type(int type); - -int char_could_start_keyword(char* character); -int char_could_split_keyword(char* character); -int char_could_start_int(char* character); -int char_can_ignore(char* character); - -int token_char_quote(char c); -int token_char_grave(char c); -int token_char_pound(char c); -int token_char_colon(char c); -int token_char_kywrd(char c); - +/* destroys a token **and all tokens contained in nxt** */ void token_destroy(token_t* token); +/* return pointer to the last token */ +token_t* token_last(token_t* token); + #endif |