diff options
author | c+1 | 2023-05-22 15:58:13 -0400 |
---|---|---|
committer | c+1 | 2023-05-22 15:58:13 -0400 |
commit | d83d37ecd5682252f85af099c3353525f1cb1394 (patch) | |
tree | 150f6f0ca3a9c5c217a86cd50d84ea170026537e | |
parent | 45b152974bb6965209287945fd706d3b0c2df9ba (diff) |
yay :)
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | examples/hello.halk | 6 | ||||
-rw-r--r-- | src/include/token.h | 25 | ||||
-rw-r--r-- | src/lexer.c | 48 | ||||
-rw-r--r-- | src/main.c | 27 | ||||
-rw-r--r-- | src/token.c | 10 |
6 files changed, 79 insertions, 38 deletions
@@ -1,6 +1,7 @@ exec = halk.out sources := $(wildcard src/*.c) objects = $(sources:.c=.o) +sources := $(filter-out src/parser.c, $(sources)) # exclude the incomplete parser for now. flags = -g $(exec): $(objects) diff --git a/examples/hello.halk b/examples/hello.halk index 8b6b038..719ba93 100644 --- a/examples/hello.halk +++ b/examples/hello.halk @@ -1,9 +1,9 @@ ` comments in backticks ` ` preprocessor directives ` -#INCLUDE.'math', 'm'; ` bring the math library into scope, under the namespace 'm' ` -#INCLUDE.'io', ''; ` bring the io library into global scope (with no namespace) ` - +#INCLUDE##math# +#INCLUDE##io# +` source code begins here ` str:hello = 'hello, '; ` variables must be given a value at declaration ` diff --git a/src/include/token.h b/src/include/token.h index f7a166f..e52c3a0 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -1,19 +1,27 @@ #ifndef TOKEN_H #define TOKEN_H +#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" +#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN 53 +#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS "1234567890_-" +#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN 12 +#define TOKEN_CHAR_IGNORE " \t\n\r" +#define TOKEN_CHAR_IGNORE_LEN 4 +#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789" typedef struct TOKEN_STRUC { enum { TOKEN_KEYWORD, // keyword - TOKEN_STR_DELIM, // ' - TOKEN_STR, // 'string' + TOKEN_PRIM_STR_DELIM, // ' + TOKEN_PRIM_STR, // 'string' + TOKEN_PRIM_INT, // 'string' TOKEN_COMM_DELIM, // ` TOKEN_COMM, // `comment` TOKEN_EXPR_END, // ; TOKEN_LGROUP, // ( TOKEN_RGROUP, // ) TOKEN_DIRECTIVE_DELIM, // # - TOKEN_DIRECTIVE, // #DIRECTIVE; + TOKEN_DIRECTIVE, // #DIRECTIVE# TOKEN_FN_APPLY, // . TOKEN_LIST_DELIM, // , TOKEN_DEF_TAGS_DELIM, // : @@ -23,6 +31,7 @@ typedef struct TOKEN_STRUC { TOKEN_NAMESPACE_DELIM, // / TOKEN_ARRAY_DELIM_START, // [ TOKEN_ARRAY_DELIM_END, // ] + TOKEN_DEF_SET, // = TOKEN_EOF, // \0 } type; @@ -31,17 +40,9 @@ typedef struct TOKEN_STRUC { token_t* token_init(int type, char* val); -char TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS[] = "abcdefghijklmnopqrstuvwxyz_"; // chars that can begin a var name -int TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN = 27; // maximum efficiency! -char TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS[] = "1234567890_-"; // chars that can be in the rest of the var name, - // not including the ones already defined to begin - // one. -int TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN = 12; -char TOKEN_CHAR_IGNORE[] = " \t\n\r"; // characters to ignore while parsing tokens -int TOKEN_CHAR_IGNORE_LEN = 4; - int char_could_start_keyword(char* character); int char_could_split_keyword(char* character); +int char_could_start_int(char* character); int char_can_ignore(char* character); diff --git a/src/lexer.c b/src/lexer.c index 484766f..5a04da0 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -6,7 +6,6 @@ #include "include/log.h" #include "include/lexer.h" -#include "include/token.h" lexer_t* lexer_init(char* content) { @@ -40,13 +39,17 @@ void lexer_pass(lexer_t* lexer) { token_t* lexer_get_next_token(lexer_t* lexer) { while (LEXER_VALID) { - if (char_can_ignore(&lexer->c)) { - lexer_pass(lexer); - } - - if (char_could_start_keyword(&lexer->c)) { - return lexer_get_keyword(lexer); + if (char_can_ignore(&lexer->c)) { lexer_pass(lexer); } + if (char_could_start_int(&lexer->c)) { + return lexer_next_token( + lexer, + token_init( + TOKEN_PRIM_INT, + lexer_get_c_as_string(lexer) + ) + ); } + if (char_could_start_keyword(&lexer->c)) { return lexer_get_keyword(lexer); } switch (lexer->c) { case '\'': @@ -65,9 +68,13 @@ token_t* lexer_get_next_token(lexer_t* lexer) { ); break; case '=': - return lexer_get_def_const(lexer); break; - case '-': - return lexer_get_def_mut(lexer); break; + return lexer_next_token( + lexer, + token_init( + TOKEN_DEF_SET, + lexer_get_c_as_string(lexer) + ) + ); break; case '(': return lexer_next_token( lexer, token_init( @@ -101,10 +108,17 @@ token_t* lexer_get_next_token(lexer_t* lexer) { case ':': return lexer_next_token( lexer, token_init( - TOKEN_VAR_DEF_ARGS_DELIM, + TOKEN_DEF_TAGS_DELIM, lexer_get_c_as_string(lexer) ) ); break; + case '/': return lexer_next_token( + lexer, + token_init( + TOKEN_NAMESPACE_DELIM, + lexer_get_c_as_string(lexer) + ) + ); break; case '{': return lexer_next_token( lexer, token_init( @@ -119,9 +133,11 @@ token_t* lexer_get_next_token(lexer_t* lexer) { lexer_get_c_as_string(lexer) ) ); break; + case '[': return lexer_get_array(lexer); break; case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break; default: log_err("Unrecognized token"); + printf("%s", &lexer->c); exit(1); } } @@ -162,7 +178,7 @@ token_t* lexer_get_array(lexer_t* lexer) { lexer_next(lexer); // skip over closing ] - return token_init(TOKEN_STR, array_so_far); // return the collected array + return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array } token_t* lexer_get_string(lexer_t* lexer) { @@ -183,13 +199,14 @@ token_t* lexer_get_string(lexer_t* lexer) { lexer_next(lexer); // skip over closing ' - return token_init(TOKEN_STR, str_so_far); // return the collected string + return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string } token_t* lexer_get_comment(lexer_t* lexer) { lexer_next(lexer); char* comment_so_far = calloc(1, sizeof(char)); + comment_so_far[0] = '\0'; while (lexer->c != '`') { char* current = lexer_get_c_as_string(lexer); @@ -212,7 +229,7 @@ token_t* lexer_get_directive(lexer_t* lexer) { char* directive_so_far = calloc(1, sizeof(char)); directive_so_far[0] = '\0'; - while (lexer->c != ';') { + while (lexer->c != '#') { char* current = lexer_get_c_as_string(lexer); directive_so_far = realloc( directive_so_far, @@ -223,7 +240,7 @@ token_t* lexer_get_directive(lexer_t* lexer) { lexer_next(lexer); } - lexer_next(lexer); // skip over closing ; + lexer_next(lexer); return token_init(TOKEN_DIRECTIVE, directive_so_far); } @@ -242,6 +259,7 @@ token_t* lexer_get_keyword(lexer_t* lexer) { lexer_next(lexer); } + return token_init(TOKEN_KEYWORD, keyword_so_far); } @@ -1,10 +1,11 @@ #include <stdio.h> +#include <stdlib.h> #include "include/log.h" #include "include/lexer.h" -#include "include/tree.h" -#include "include/parser.h" +// #include "include/tree.h" +// #include "include/parser.h" int main(int argc, char* argv[]) { @@ -42,14 +43,24 @@ int main(int argc, char* argv[]) { lexer_t* lexer = lexer_init(source); log_inf("Lexer created"); - parser_t* parser = parser_init(lexer); - log_inf("Parser created"); + log_inf("== BEGIN INPUT =="); + log_inf(lexer->content); + log_inf("=== END INPUT ==="); - tree_t* tree = parser_parse(parser); - log_inf("Tree root created"); + token_t* token = NULL; - printf("TYPE: [%d]\n", tree->type); - printf("SIZE: [%d]\n", tree->data.subtree.size); + while ((token = lexer_get_next_token(lexer)) != NULL) { + printf("===\ntoken type: %d:\ntoken value: || %s ||\n===\n", token->type, token->value); + } + + //parser_t* parser = parser_init(lexer); + //log_inf("Parser created"); + + //tree_t* tree = parser_parse(parser); + //log_inf("Tree root created"); + + //printf("TYPE: [%d]\n", tree->type); + //printf("SIZE: [%d]\n", tree->data.subtree.size); fclose(fsource); diff --git a/src/token.c b/src/token.c index 9ea2ccf..432d44f 100644 --- a/src/token.c +++ b/src/token.c @@ -37,6 +37,16 @@ int char_could_split_keyword(char* character) { } } +int char_could_start_int(char* character) { + for (int i = 0; i < 10; ++ i) { + if (TOKEN_CHAR_FIRST_CHAR_INT[i] == *character) { + return 1; + } + } + + return 0; +} + int char_can_ignore(char* character) { for (int i = 0; i < TOKEN_CHAR_IGNORE_LEN; ++ i) { if (TOKEN_CHAR_IGNORE[i] == *character) { |