aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authors-over-42023-06-13 21:19:09 -0400
committers-over-42023-06-13 21:19:09 -0400
commit4b0d75d3dbfb85e10ee70c16000c48cf0de95740 (patch)
tree03b1a199f8658b0fde7283bdccf698455f2df44c
parent11fbe0ec9b8bf51d237fefee9254e68d3b22259b (diff)
lexer is done. forever.
-rw-r--r--examples/functional.halk13
-rw-r--r--src/include/lexer.h2
-rw-r--r--src/include/token.h6
-rw-r--r--src/lexer.c37
-rw-r--r--src/token.c7
5 files changed, 35 insertions, 30 deletions
diff --git a/examples/functional.halk b/examples/functional.halk
deleted file mode 100644
index a167b69..0000000
--- a/examples/functional.halk
+++ /dev/null
@@ -1,13 +0,0 @@
-` the Y-Combinator in HALK `
-` an empty : when not proceeded by a type is shorthand for :any `
-
-
-:Y = {
- :λ.:f = {
- :λ.:x = {
- f.x.x
- }. :λ.:x = {
- f.x.x
- }
- }
-}
diff --git a/src/include/lexer.h b/src/include/lexer.h
index f9db17e..200be5d 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -28,7 +28,7 @@ extern char* lexer_get_c_as_string (lexer_t* lexer);
// collectors
extern token_t* lexer_get_arr (lexer_t* lexer);
-extern token_t* lexer_collect (lexer_t* lexer, char end_char, int fskip, int lskip, int type);
+extern token_t* lexer_collect (lexer_t* lexer, int (*end_char)(char), int fskip, int lskip, int type);
// special def collectors
extern token_t* lexer_get_directive (lexer_t* lexer);
diff --git a/src/include/token.h b/src/include/token.h
index f05e962..5fe9d35 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -45,5 +45,11 @@ int char_could_split_keyword(char* character);
int char_could_start_int(char* character);
int char_can_ignore(char* character);
+int token_char_quote(char c);
+int token_char_grave(char c);
+int token_char_pound(char c);
+int token_char_colon(char c);
+int token_char_kywrd(char c);
+
#endif
diff --git a/src/lexer.c b/src/lexer.c
index 6cde958..08105dc 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -40,14 +40,14 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
if (char_can_ignore(&lexer->c)) { lexer_pass(lexer); }
if (char_could_start_int(&lexer->c)) { return lexer_next_token(lexer, TOKEN_PRIM_INT); }
- if (char_could_start_keyword(&lexer->c)) { return lexer_get_keyword(lexer); }
+ if (char_could_start_keyword(&lexer->c)) { return lexer_collect(lexer, token_char_kywrd, 0, 1, TOKEN_KEYWORD); }
switch (lexer->c) {
case '\'':
- return lexer_collect(lexer, '\'', 1, 1, TOKEN_PRIM_STR);
+ return lexer_collect(lexer, token_char_quote, 1, 1, TOKEN_PRIM_STR);
break;
case '`':
- return lexer_collect(lexer, '`', 1, 1, TOKEN_COMM);
+ return lexer_collect(lexer, token_char_grave, 1, 1, TOKEN_COMM);
break;
case ';':
return lexer_next_token(lexer, TOKEN_EXPR_END);
@@ -62,7 +62,7 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
return lexer_next_token(lexer, TOKEN_RGROUP);
break;
case '#':
- return lexer_collect(lexer, '#', 1, 1, TOKEN_DIRECTIVE);
+ return lexer_collect(lexer, token_char_pound, 1, 1, TOKEN_DIRECTIVE);
break;
case '.':
return lexer_next_token(lexer, TOKEN_FN_APPLY);
@@ -71,7 +71,7 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
return lexer_next_token(lexer, TOKEN_LIST_DELIM);
break;
case ':':
- return lexer_collect(lexer, ':', 1, 1, TOKEN_DEF_TAG);
+ return lexer_collect(lexer, token_char_colon, 1, 1, TOKEN_DEF_TAG);
break;
case '/':
return lexer_next_token(lexer, TOKEN_NAMESPACE_DELIM);
@@ -83,7 +83,10 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
return lexer_next_token(lexer, TOKEN_BLOCK_END);
break;
case '[':
- return lexer_collect(lexer, ']', 1, 1, TOKEN_PRIM_STR);
+ return lexer_next_token(lexer, TOKEN_ARRAY_START);
+ break;
+ case ']':
+ return lexer_next_token(lexer, TOKEN_ARRAY_END);
break;
case '\0':
return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer));
@@ -103,7 +106,7 @@ token_t* lexer_next_token(lexer_t* lexer, int token_type) {
}
char* lexer_get_c_as_string(lexer_t* lexer) {
- char* str = calloc(2, 1 * sizeof(char));
+ char* str = calloc(2, sizeof(char));
str[0] = lexer->c;
str[1] = '\0';
@@ -112,14 +115,14 @@ char* lexer_get_c_as_string(lexer_t* lexer) {
// fskip: skip first char?
// lskip: skip last char?
-token_t* lexer_collect(lexer_t* lexer, char end_char, int fskip, int lskip, int type) {
+token_t* lexer_collect(lexer_t* lexer, int (*end_char)(char), int fskip, int lskip, int type) {
if (fskip) { lexer_next(lexer); }
size_t len = 0; // length of collected token so far
char* token = calloc(len, sizeof(char));
token[0] = '\0';
- while (lexer->c != end_char) {
+ while (end_char(lexer->c)) {
char* current = lexer_get_c_as_string(lexer);
token = realloc(
token,
@@ -139,19 +142,21 @@ token_t* lexer_collect(lexer_t* lexer, char end_char, int fskip, int lskip, int
}
token_t* lexer_get_keyword(lexer_t* lexer) {
- char* keyword_so_far = calloc(1, sizeof(char));
- keyword_so_far[0] = '\0';
+ size_t len = 0;
+ char* keyword = calloc(len, sizeof(char));
+ keyword[0] = '\0';
while (char_could_split_keyword(&lexer->c)) {
char* current = lexer_get_c_as_string(lexer);
- keyword_so_far = realloc(
- keyword_so_far,
- (strlen(keyword_so_far) + strlen(current) * sizeof(char))
+ keyword = realloc(
+ keyword,
+ (len + strlen(current) * sizeof(char))
);
- strcat(keyword_so_far, current);
+ memcpy(keyword + len, current, strlen(current) * sizeof(char));
+ len += strlen(current) * sizeof(char);
free(current);
lexer_next(lexer);
}
- return token_init(TOKEN_KEYWORD, keyword_so_far);
+ return token_init(TOKEN_KEYWORD, keyword);
}
diff --git a/src/token.c b/src/token.c
index 079a59f..b89096a 100644
--- a/src/token.c
+++ b/src/token.c
@@ -119,3 +119,10 @@ int char_can_ignore(char* character) {
return 0;
}
+
+
+int token_char_quote(char c) { return (c != '\''); }
+int token_char_grave(char c) { return (c != '`'); }
+int token_char_pound(char c) { return (c != '#'); }
+int token_char_colon(char c) { return (c != ':'); }
+int token_char_kywrd(char c) { return (char_could_split_keyword(&c)); }