aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorc+12023-10-05 11:02:22 -0400
committerc+12023-10-05 11:02:22 -0400
commit668e0fb0f4fc4bdd990d9ab349da445960d5764e (patch)
treea6411de1b7806d6cb91d84158cd7bc513dee235b /src/include
parent4d9dd289951589ddf408fdec62245b20cfe199c7 (diff)
redesign the lexer, some mem leaks :(
Diffstat (limited to 'src/include')
-rw-r--r--src/include/lexer.h82
-rw-r--r--src/include/token.h78
2 files changed, 77 insertions, 83 deletions
diff --git a/src/include/lexer.h b/src/include/lexer.h
index 8d4f75f..173c57d 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -9,46 +9,52 @@
/* the lexer struct */
typedef struct LEXER_STRUC {
- /* current character in content */
- char c;
-
- /* index of c */
- unsigned int i;
-
/* source being read */
- char* content;
+ char* src;
+
+ /* what the lexer is looking at right now */
+ enum LEXER_STATE {
+ /* normal 1-character token */
+ LEXER_STATE_REG,
+ /* character */
+ LEXER_STATE_CHR,
+ /* string */
+ LEXER_STATE_STR,
+ /* definition */
+ LEXER_STATE_DEF,
+ /* call */
+ LEXER_STATE_CAL
+ } state;
+
+ /* the linked list of tokens generated */
+ token_t* tokenl;
+ int tokenc;
} lexer_t;
-
-/* create lexer from source code */
-extern lexer_t* lexer_init (char* content);
-
-/* destroy the lexer */
-extern void lexer_destroy (lexer_t* lexer);
-
-/* move lexer forward one char */
-extern void lexer_next (lexer_t* lexer);
-
-/* skip useless characters */
-extern void lexer_pass (lexer_t* lexer);
-
-/* create tokens */
-extern token_t* lexer_get_next_token (lexer_t* lexer);
-
-/* create token and move 1 char */
-extern token_t* lexer_next_token (lexer_t* lexer, int token_type);
-
-/* create string from lexer->c */
-extern char* lexer_get_c_as_string (lexer_t* lexer);
-
-/*
- int fskip: skip first char?
-
- int lskip: skip last char?
-*/
-extern token_t* lexer_collect (lexer_t* lexer, int (*end_char)(char), int fskip, int lskip, int type);
-
-/* run lexer from source */
-lexer_t* lexer_run(lexer_t*);
+/* create lexer from source */
+lexer_t* lexer_init (char* src);
+
+/* destroy lexer **but not src or tokenl** */
+void lexer_destroy (lexer_t* lexer);
+
+/* add token to tokenv */
+void lexer_add_token(lexer_t* lexer, token_t* token);
+/* add the current character as a token to tokenl -- utility function for
+ lexer_do_reg() */
+void lexer_add_current_char(lexer_t* lexer, int type);
+
+/* handle regular state */
+void lexer_do_reg(lexer_t*);
+/* handle character state */
+void lexer_do_chr(lexer_t*);
+/* handle string state */
+void lexer_do_str(lexer_t*);
+/* handle definition state */
+void lexer_do_def(lexer_t*);
+/* handle call state */
+void lexer_do_cal(lexer_t*);
+
+/* run lexer */
+void lexer_run(lexer_t*);
#endif
diff --git a/src/include/token.h b/src/include/token.h
index 1a307cd..802f13d 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -1,56 +1,44 @@
#ifndef TOKEN_H
#define TOKEN_H
-#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS "+-/*abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"
-#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN 57
-#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS "1234567890_-"
-#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN 12
-#define TOKEN_CHAR_IGNORE " \t\n\r"
-#define TOKEN_CHAR_IGNORE_LEN 4
-#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789"
-
+#include "util.h"
+/* token struct */
typedef struct TOKEN_STRUC {
- enum TOKEN_ENUM {
- TOKEN_KEYWORD, // keyword
- TOKEN_PRIM_STR, // "string"
- TOKEN_PRIM_INT, // 42
- TOKEN_COMM, // `comment`
- TOKEN_STMNT_END, // ;
- TOKEN_LGROUP, // (
- TOKEN_RGROUP, // )
- TOKEN_DIRECTIVE, // #DIRECTIVE#
- TOKEN_FN_APPLY, // .
- TOKEN_LIST_DELIM, // ,
- TOKEN_DEF_TAG, // def:def
- TOKEN_BLOCK_START, // {
- TOKEN_BLOCK_END, // }
- TOKEN_NAMESPACE_DELIM, // /
- TOKEN_ARRAY_START, // [
- TOKEN_ARRAY_END, // ]
- TOKEN_DEF_SET, // =
- TOKEN_UNKNOWN, // ???
- TOKEN_EOF, // \0
- } type;
-
- char* value;
+ /* token type */
+ enum TOKEN_TYPE {
+ TOKEN_UNKNOWN,
+ TOKEN_CHAR_DELIM,
+ TOKEN_STR_DELIM,
+ TOKEN_COMMENT_DELIM,
+ TOKEN_EXPR_END,
+ TOKEN_SET,
+ TOKEN_LGROUP,
+ TOKEN_RGROUP,
+ TOKEN_APPLY,
+ TOKEN_LIST_DELIM,
+ TOKEN_TAG_DELIM,
+ TOKEN_NAMESPACE_DELIM,
+ TOKEN_LBLOCK,
+ TOKEN_RBLOCK,
+ TOKEN_RLIST,
+ TOKEN_LLIST,
+ TOKEN_ESC
+ } type;
+
+ /* token value */
+ char* val;
+
+ /* next token */
+ struct TOKEN_STRUC* nxt;
} token_t;
+/* creates a token */
token_t* token_init(int type, char* val);
-
-char* token_get_type(int type);
-
-int char_could_start_keyword(char* character);
-int char_could_split_keyword(char* character);
-int char_could_start_int(char* character);
-int char_can_ignore(char* character);
-
-int token_char_quote(char c);
-int token_char_grave(char c);
-int token_char_pound(char c);
-int token_char_colon(char c);
-int token_char_kywrd(char c);
-
+/* destroys a token **and all tokens contained in nxt** */
void token_destroy(token_t* token);
+/* return pointer to the last token */
+token_t* token_last(token_t* token);
+
#endif