From c0a2fd32d69afa8c9c5b31fa763c81fffe7f99f6 Mon Sep 17 00:00:00 2001 From: s-over-4 Date: Sat, 10 Jun 2023 18:30:31 -0400 Subject: ah --- src/lexer.c | 102 +++++++++++++++--------------------------------------------- 1 file changed, 25 insertions(+), 77 deletions(-) (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c index e5d3c24..d137a07 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -53,10 +53,10 @@ token_t* lexer_get_next_token(lexer_t* lexer) { switch (lexer->c) { case '\'': - return lexer_get_str(lexer); + return lexer_collect(lexer, '\'', 1, 1, TOKEN_PRIM_STR); break; case '`': - return lexer_get_com(lexer); + return lexer_collect(lexer, '`', 1, 1, TOKEN_COMM); break; case ';': return lexer_next_token( @@ -90,7 +90,8 @@ token_t* lexer_get_next_token(lexer_t* lexer) { ) ); break; case '#': - return lexer_get_directive(lexer); break; + return lexer_collect(lexer, '#', 1, 1, TOKEN_DIRECTIVE); + break; case '.': return lexer_next_token( lexer, token_init( @@ -133,7 +134,9 @@ token_t* lexer_get_next_token(lexer_t* lexer) { lexer_get_c_as_string(lexer) ) ); break; - case '[': return lexer_get_arr(lexer); break; + case '[': + return lexer_collect(lexer, ']', 1, 1, TOKEN_PRIM_STR); + break; case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break; default: log_err("Unrecognized token"); @@ -159,90 +162,37 @@ char* lexer_get_c_as_string(lexer_t* lexer) { return str; } -// TODO: abstract away this kind of thing -token_t* lexer_get_arr(lexer_t* lexer) { - lexer_next(lexer); // skip opening [ - char* array_so_far = calloc(1, sizeof(char)); - array_so_far[0] = '\0'; - - while (lexer->c != ']') { - char* current = lexer_get_c_as_string(lexer); - array_so_far = realloc( - array_so_far, - (strlen(array_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(array_so_far, current); - lexer_next(lexer); - } - - lexer_next(lexer); // skip over closing ] - - return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array -} - -token_t* lexer_get_str(lexer_t* lexer) { - lexer_next(lexer); - char* str_so_far = calloc(1, sizeof(char)); - str_so_far[0] = '\0'; - - while (lexer->c != '\'') { - char* current = lexer_get_c_as_string(lexer); - str_so_far = realloc( - str_so_far, - (strlen(str_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(str_so_far, current); - lexer_next(lexer); +// fskip: skip first char? +// lskip: skip last char? +token_t* lexer_collect(lexer_t* lexer, char end_char, int fskip, int lskip, int type) { + if (fskip) { + lexer_next(lexer); // skip over starting character, e.g. opening quote or grave } - lexer_next(lexer); // skip over closing ' - - return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string -} - -token_t* lexer_get_com(lexer_t* lexer) { - lexer_next(lexer); + size_t len = 0; // length of collected token so far + char* token = calloc(len, sizeof(char)); + token[0] = '\0'; - char* comment_so_far = calloc(1, sizeof(char)); - comment_so_far[0] = '\0'; - - while (lexer->c != '`') { + while (lexer->c != end_char) { char* current = lexer_get_c_as_string(lexer); - comment_so_far = realloc( - comment_so_far, - (strlen(comment_so_far) + strlen(current) * sizeof(char)) + printf("[%p, %ld]\n", token, (strlen(token) + strlen(current) * sizeof(char))); + token = realloc( + token, + (len + strlen(current) * sizeof(char)) ); - strcat(comment_so_far, current); + memcpy(token + len, current, strlen(current) * sizeof(char)); + len += strlen(current) * sizeof(char); lexer_next(lexer); } - lexer_next(lexer); // skip over closing ` - - return token_init(TOKEN_COMM, comment_so_far); -} - -token_t* lexer_get_directive(lexer_t* lexer) { - lexer_next(lexer); - char* directive_so_far = calloc(1, sizeof(char)); - directive_so_far[0] = '\0'; - - while (lexer->c != '#') { - char* current = lexer_get_c_as_string(lexer); - directive_so_far = realloc( - directive_so_far, - (strlen(directive_so_far) + strlen(current) * sizeof(char)) - ); - - strcat(directive_so_far, current); + if (lskip) { lexer_next(lexer); } - lexer_next(lexer); + token[len] = '\0'; // null terminate - return token_init(TOKEN_DIRECTIVE, directive_so_far); + return token_init(type, token); } token_t* lexer_get_keyword(lexer_t* lexer) { @@ -259,7 +209,5 @@ token_t* lexer_get_keyword(lexer_t* lexer) { lexer_next(lexer); } - return token_init(TOKEN_KEYWORD, keyword_so_far); } - -- cgit v1.2.3