diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/lexer.h | 8 | ||||
-rw-r--r-- | src/include/token.h | 28 | ||||
-rw-r--r-- | src/lexer.c | 117 | ||||
-rw-r--r-- | src/parser.c | 6 |
4 files changed, 95 insertions, 64 deletions
diff --git a/src/include/lexer.h b/src/include/lexer.h index eb80646..bff0a80 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -20,9 +20,15 @@ extern token_t* lexer_get_next_token(lexer_t* lexer); extern token_t* lexer_get_string(lexer_t* lexer); +extern token_t* lexer_get_def_const(lexer_t* lexer); + +extern token_t* lexer_get_def_mut(lexer_t* lexer); + extern token_t* lexer_get_comment(lexer_t* lexer); -extern token_t* lexer_get_id(lexer_t* lexer); +extern token_t* lexer_get_directive(lexer_t* lexer); + +extern token_t* lexer_get_keyword(lexer_t* lexer); extern token_t* lexer_next_token(lexer_t* lexer, token_t* token); diff --git a/src/include/token.h b/src/include/token.h index 147862c..4ca8356 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,20 +4,20 @@ typedef struct TOKEN_STRUC { enum { - TOKEN_ID, // keyword - TOKEN_EQ, // '=' - TOKEN_STR, // "string" - TOKEN_SEMI, // ';' - TOKEN_LORD, // '/' - TOKEN_RORD, // '\' - TOKEN_AMP, // '&' - TOKEN_LBRAK, // '[' - TOKEN_RBRAK, // ']' - TOKEN_POUND, // '#'' - TOKEN_TILDE, // '~' - TOKEN_QUOTE, // ''' - TOKEN_COMM, // '[comment]' - TOKEN_EOF // '\0' + TOKEN_KEYWORD, // keyword + TOKEN_STR_DELIM, // '"' + TOKEN_STR, // "string" + TOKEN_COMM_DELIM_START, // '[' + TOKEN_COMM, // '[comment]' + TOKEN_COMM_DELIM_END, // ']' + TOKEN_DEFINE_CONST, // '=>' + TOKEN_DEFINE_MUT, // '->' + TOKEN_END, // ';' + TOKEN_LORD, // '(' + TOKEN_RORD, // ')' + TOKEN_DIRECTIVE_DELIM, // '#' + TOKEN_DIRECTIVE, // #DIRECTIVE; + TOKEN_EOF, // '\0' } type; char* value; diff --git a/src/lexer.c b/src/lexer.c index e23b078..77de3e4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -27,9 +27,10 @@ void lexer_next(lexer_t* lexer) { } void lexer_pass(lexer_t* lexer) { - while (lexer->c == ' ' || - lexer->c == '\t' || - lexer->c == '\n') { + while ( + lexer->c == ' ' || + lexer->c == '\t' || + lexer->c == '\n') { lexer_next(lexer); } } @@ -43,62 +44,39 @@ token_t* lexer_get_next_token(lexer_t* lexer) { } if (isalnum(lexer->c)) - return lexer_get_id(lexer); + return lexer_get_keyword(lexer); switch (lexer->c) { - case '"': return lexer_get_string(lexer); break; - case '=': return lexer_next_token( + case '"': + return lexer_get_string(lexer); break; + case '[': + return lexer_get_comment(lexer); break; + case '=': + return lexer_get_def_const(lexer); break; + case '-': + return lexer_get_def_mut(lexer); break; + case ';': return lexer_next_token( lexer, token_init( - TOKEN_EQ, + TOKEN_END, lexer_get_c_as_string(lexer) ) ); break; - case '{': return lexer_next_token( + case '(': return lexer_next_token( lexer, token_init( TOKEN_LORD, lexer_get_c_as_string(lexer) ) ); break; - case '}': return lexer_next_token( + case ')': return lexer_next_token( lexer, token_init( TOKEN_RORD, lexer_get_c_as_string(lexer) ) ); break; - case '&': return lexer_next_token( - lexer, - token_init( - TOKEN_AMP, - lexer_get_c_as_string(lexer) - ) - ); break; - case '[': - return lexer_get_comment(lexer); - break; - case '#': return lexer_next_token( - lexer, - token_init( - TOKEN_POUND, - lexer_get_c_as_string(lexer) - ) - ); break; - case '~': return lexer_next_token( - lexer, - token_init( - TOKEN_TILDE, - lexer_get_c_as_string(lexer) - ) - ); break; - case ';': return lexer_next_token( - lexer, - token_init( - TOKEN_SEMI, - lexer_get_c_as_string(lexer) - ) - ); break; + case '#': default: exit(1); } @@ -114,9 +92,12 @@ token_t* lexer_get_string(lexer_t* lexer) { str_so_far[0] = '\0'; while (lexer->c != '"') { - // until reaching the closing ", add each character to str_so_far and adjust size to match. char* current = lexer_get_c_as_string(lexer); - str_so_far = realloc(str_so_far, (strlen(str_so_far) + strlen(current) * sizeof(char))); + str_so_far = realloc( + str_so_far, + (strlen(str_so_far) + strlen(current) * sizeof(char)) + ); + strcat(str_so_far, current); lexer_next(lexer); @@ -124,12 +105,14 @@ token_t* lexer_get_string(lexer_t* lexer) { lexer_next(lexer); // skip over closing " - return token_init(TOKEN_QUOTE, str_so_far); + return token_init(TOKEN_STR, str_so_far); } token_t* lexer_get_comment(lexer_t* lexer) { + lexer_next(lexer); while (lexer->c != ']') { - lexer_next(lexer); + lexer_next(lexer); // don't need to keep track of comments + // for now. might change this later. } lexer_next(lexer); // skip over closing ] @@ -137,7 +120,48 @@ token_t* lexer_get_comment(lexer_t* lexer) { return token_init(TOKEN_COMM, lexer_get_c_as_string(lexer)); } -token_t* lexer_get_id(lexer_t* lexer) { +token_t* lexer_get_def_const(lexer_t* lexer) { + lexer_pass(lexer); + + if (lexer_next(lexer), lexer->c == '>') { + lexer_next(lexer); + return token_init(TOKEN_DEFINE_CONST, "=>"); + } else { + exit(1); + } +} + +token_t* lexer_get_def_mut(lexer_t* lexer) { + lexer_pass(lexer); + + if (lexer_next(lexer), lexer->c == '>') { + lexer_next(lexer); + return token_init(TOKEN_DEFINE_MUT, "->"); + } else { + exit(1); + } +} + +token_t* lexer_get_directive(lexer_t* lexer) { + lexer_next(lexer); + + char* directive_so_far = calloc(1, sizeof(char)); + directive_so_far[0] = '\0'; + + while (lexer->c != ';') { + char* current = lexer_get_c_as_string(lexer); + directive_so_far = realloc(directive_so_far, (strlen(directive_so_far) + strlen(current) * sizeof(char))); + strcat(directive_so_far, current); + + lexer_next(lexer); + } + + lexer_next(lexer); + + return token_init(TOKEN_DIRECTIVE, directive_so_far); +} + +token_t* lexer_get_keyword(lexer_t* lexer) { char* str_so_far = calloc(1, sizeof(char)); str_so_far[0] = '\0'; @@ -149,7 +173,7 @@ token_t* lexer_get_id(lexer_t* lexer) { lexer_next(lexer); } - return token_init(TOKEN_ID, str_so_far); + return token_init(TOKEN_KEYWORD, str_so_far); } token_t* lexer_next_token(lexer_t* lexer, token_t* token) { @@ -162,6 +186,7 @@ char* lexer_get_c_as_string(lexer_t* lexer) { char* str = calloc(2, 1 * sizeof(char)); str[0] = lexer->c; str[1] = '\0'; + return str; } diff --git a/src/parser.c b/src/parser.c index 4468997..0a437a6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -44,7 +44,7 @@ tree_t* parser_parse_token_id(parser_t* parser) { // parse a single chunk tree_t* parser_parse_chunk(parser_t* parser) { switch (parser->token->type) { - case TOKEN_ID: { + case TOKEN_KEYWORD: { return parser_parse_token_id(parser); } } @@ -62,9 +62,9 @@ tree_t* parser_parse_chunks(parser_t* parser) { subtree->data.subtree.val[0] = tree_chunk; - while (parser->token->type == TOKEN_SEMI) { + while (parser->token->type == TOKEN_END) { // expect semicolon - parser_check_expect(parser, TOKEN_SEMI); + parser_check_expect(parser, TOKEN_END); // make room for new subtree subtree->data.subtree.size ++; |