From c0a2fd32d69afa8c9c5b31fa763c81fffe7f99f6 Mon Sep 17 00:00:00 2001 From: s-over-4 Date: Sat, 10 Jun 2023 18:30:31 -0400 Subject: ah --- src/include/lexer.h | 3 +- src/include/token.h | 2 +- src/lexer.c | 102 +++++++++++++--------------------------------------- src/main.c | 2 +- 4 files changed, 28 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/include/lexer.h b/src/include/lexer.h index a86f5bb..853a35c 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -28,8 +28,7 @@ extern char* lexer_get_c_as_string (lexer_t* lexer); // collectors extern token_t* lexer_get_arr (lexer_t* lexer); -extern token_t* lexer_get_str (lexer_t* lexer); -extern token_t* lexer_get_com (lexer_t* lexer); +extern token_t* lexer_collect (lexer_t* lexer, char end_char, int fskip, int lskip, int type); // special def collectors extern token_t* lexer_get_directive (lexer_t* lexer); diff --git a/src/include/token.h b/src/include/token.h index 8964ccd..b58233d 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -10,7 +10,7 @@ #define TOKEN_CHAR_FIRST_CHAR_INT "0123456789" typedef struct TOKEN_STRUC { - enum { + enum TOKEN_ENUM { TOKEN_KEYWORD, // keyword TOKEN_PRIM_STR_DELIM, // ' TOKEN_PRIM_STR, // 'string' diff --git a/src/lexer.c b/src/lexer.c index e5d3c24..d137a07 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -53,10 +53,10 @@ token_t* lexer_get_next_token(lexer_t* lexer) { switch (lexer->c) { case '\'': - return lexer_get_str(lexer); + return lexer_collect(lexer, '\'', 1, 1, TOKEN_PRIM_STR); break; case '`': - return lexer_get_com(lexer); + return lexer_collect(lexer, '`', 1, 1, TOKEN_COMM); break; case ';': return lexer_next_token( @@ -90,7 +90,8 @@ token_t* lexer_get_next_token(lexer_t* lexer) { ) ); break; case '#': - return lexer_get_directive(lexer); break; + return lexer_collect(lexer, '#', 1, 1, TOKEN_DIRECTIVE); + break; case '.': return lexer_next_token( lexer, token_init( @@ -133,7 +134,9 @@ token_t* lexer_get_next_token(lexer_t* lexer) { lexer_get_c_as_string(lexer) ) ); break; - case '[': return lexer_get_arr(lexer); break; + case '[': + return lexer_collect(lexer, ']', 1, 1, TOKEN_PRIM_STR); + break; case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break; default: log_err("Unrecognized token"); @@ -159,90 +162,37 @@ char* lexer_get_c_as_string(lexer_t* lexer) { return str; } -// TODO: abstract away this kind of thing -token_t* lexer_get_arr(lexer_t* lexer) { - lexer_next(lexer); // skip opening [ - char* array_so_far = calloc(1, sizeof(char)); - array_so_far[0] = '\0'; - - while (lexer->c != ']') { - char* current = lexer_get_c_as_string(lexer); - array_so_far = realloc( - array_so_far, - (strlen(array_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(array_so_far, current); - lexer_next(lexer); - } - - lexer_next(lexer); // skip over closing ] - - return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array -} - -token_t* lexer_get_str(lexer_t* lexer) { - lexer_next(lexer); - char* str_so_far = calloc(1, sizeof(char)); - str_so_far[0] = '\0'; - - while (lexer->c != '\'') { - char* current = lexer_get_c_as_string(lexer); - str_so_far = realloc( - str_so_far, - (strlen(str_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(str_so_far, current); - lexer_next(lexer); +// fskip: skip first char? +// lskip: skip last char? +token_t* lexer_collect(lexer_t* lexer, char end_char, int fskip, int lskip, int type) { + if (fskip) { + lexer_next(lexer); // skip over starting character, e.g. opening quote or grave } - lexer_next(lexer); // skip over closing ' - - return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string -} - -token_t* lexer_get_com(lexer_t* lexer) { - lexer_next(lexer); + size_t len = 0; // length of collected token so far + char* token = calloc(len, sizeof(char)); + token[0] = '\0'; - char* comment_so_far = calloc(1, sizeof(char)); - comment_so_far[0] = '\0'; - - while (lexer->c != '`') { + while (lexer->c != end_char) { char* current = lexer_get_c_as_string(lexer); - comment_so_far = realloc( - comment_so_far, - (strlen(comment_so_far) + strlen(current) * sizeof(char)) + printf("[%p, %ld]\n", token, (strlen(token) + strlen(current) * sizeof(char))); + token = realloc( + token, + (len + strlen(current) * sizeof(char)) ); - strcat(comment_so_far, current); + memcpy(token + len, current, strlen(current) * sizeof(char)); + len += strlen(current) * sizeof(char); lexer_next(lexer); } - lexer_next(lexer); // skip over closing ` - - return token_init(TOKEN_COMM, comment_so_far); -} - -token_t* lexer_get_directive(lexer_t* lexer) { - lexer_next(lexer); - char* directive_so_far = calloc(1, sizeof(char)); - directive_so_far[0] = '\0'; - - while (lexer->c != '#') { - char* current = lexer_get_c_as_string(lexer); - directive_so_far = realloc( - directive_so_far, - (strlen(directive_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(directive_so_far, current); + if (lskip) { lexer_next(lexer); } - lexer_next(lexer); + token[len] = '\0'; // null terminate - return token_init(TOKEN_DIRECTIVE, directive_so_far); + return token_init(type, token); } token_t* lexer_get_keyword(lexer_t* lexer) { @@ -259,7 +209,5 @@ token_t* lexer_get_keyword(lexer_t* lexer) { lexer_next(lexer); } - return token_init(TOKEN_KEYWORD, keyword_so_far); } - diff --git a/src/main.c b/src/main.c index 755a4e8..040ef73 100644 --- a/src/main.c +++ b/src/main.c @@ -13,7 +13,7 @@ int main(int argc, char* argv[]) { long fsource_size; char *source; - fsource = fopen ("examples/hello.halk", "rb"); + fsource = fopen ("examples/hello.halk", "rb"); if (!fsource) { log_err("Source file not found"); exit(1); -- cgit v1.2.3