aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorc+12023-05-22 15:58:13 -0400
committerc+12023-05-22 15:58:13 -0400
commitd83d37ecd5682252f85af099c3353525f1cb1394 (patch)
tree150f6f0ca3a9c5c217a86cd50d84ea170026537e
parent45b152974bb6965209287945fd706d3b0c2df9ba (diff)
yay :)
-rw-r--r--Makefile1
-rw-r--r--examples/hello.halk6
-rw-r--r--src/include/token.h25
-rw-r--r--src/lexer.c48
-rw-r--r--src/main.c27
-rw-r--r--src/token.c10
6 files changed, 79 insertions, 38 deletions
diff --git a/Makefile b/Makefile
index b6e0252..b6866bc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
exec = halk.out
sources := $(wildcard src/*.c)
objects = $(sources:.c=.o)
+sources := $(filter-out src/parser.c, $(sources)) # exclude the incomplete parser for now.
flags = -g
$(exec): $(objects)
diff --git a/examples/hello.halk b/examples/hello.halk
index 8b6b038..719ba93 100644
--- a/examples/hello.halk
+++ b/examples/hello.halk
@@ -1,9 +1,9 @@
` comments in backticks `
` preprocessor directives `
-#INCLUDE.'math', 'm'; ` bring the math library into scope, under the namespace 'm' `
-#INCLUDE.'io', ''; ` bring the io library into global scope (with no namespace) `
-
+#INCLUDE##math#
+#INCLUDE##io#
+` source code begins here `
str:hello = 'hello, '; ` variables must be given a value at declaration `
diff --git a/src/include/token.h b/src/include/token.h
index f7a166f..e52c3a0 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -1,19 +1,27 @@
#ifndef TOKEN_H
#define TOKEN_H
+#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"
+#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN 53
+#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS "1234567890_-"
+#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN 12
+#define TOKEN_CHAR_IGNORE " \t\n\r"
+#define TOKEN_CHAR_IGNORE_LEN 4
+#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789"
typedef struct TOKEN_STRUC {
enum {
TOKEN_KEYWORD, // keyword
- TOKEN_STR_DELIM, // '
- TOKEN_STR, // 'string'
+ TOKEN_PRIM_STR_DELIM, // '
+ TOKEN_PRIM_STR, // 'string'
+ TOKEN_PRIM_INT, // 'string'
TOKEN_COMM_DELIM, // `
TOKEN_COMM, // `comment`
TOKEN_EXPR_END, // ;
TOKEN_LGROUP, // (
TOKEN_RGROUP, // )
TOKEN_DIRECTIVE_DELIM, // #
- TOKEN_DIRECTIVE, // #DIRECTIVE;
+ TOKEN_DIRECTIVE, // #DIRECTIVE#
TOKEN_FN_APPLY, // .
TOKEN_LIST_DELIM, // ,
TOKEN_DEF_TAGS_DELIM, // :
@@ -23,6 +31,7 @@ typedef struct TOKEN_STRUC {
TOKEN_NAMESPACE_DELIM, // /
TOKEN_ARRAY_DELIM_START, // [
TOKEN_ARRAY_DELIM_END, // ]
+ TOKEN_DEF_SET, // =
TOKEN_EOF, // \0
} type;
@@ -31,17 +40,9 @@ typedef struct TOKEN_STRUC {
token_t* token_init(int type, char* val);
-char TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS[] = "abcdefghijklmnopqrstuvwxyz_"; // chars that can begin a var name
-int TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN = 27; // maximum efficiency!
-char TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS[] = "1234567890_-"; // chars that can be in the rest of the var name,
- // not including the ones already defined to begin
- // one.
-int TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN = 12;
-char TOKEN_CHAR_IGNORE[] = " \t\n\r"; // characters to ignore while parsing tokens
-int TOKEN_CHAR_IGNORE_LEN = 4;
-
int char_could_start_keyword(char* character);
int char_could_split_keyword(char* character);
+int char_could_start_int(char* character);
int char_can_ignore(char* character);
diff --git a/src/lexer.c b/src/lexer.c
index 484766f..5a04da0 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -6,7 +6,6 @@
#include "include/log.h"
#include "include/lexer.h"
-#include "include/token.h"
lexer_t* lexer_init(char* content) {
@@ -40,13 +39,17 @@ void lexer_pass(lexer_t* lexer) {
token_t* lexer_get_next_token(lexer_t* lexer) {
while (LEXER_VALID) {
- if (char_can_ignore(&lexer->c)) {
- lexer_pass(lexer);
- }
-
- if (char_could_start_keyword(&lexer->c)) {
- return lexer_get_keyword(lexer);
+ if (char_can_ignore(&lexer->c)) { lexer_pass(lexer); }
+ if (char_could_start_int(&lexer->c)) {
+ return lexer_next_token(
+ lexer,
+ token_init(
+ TOKEN_PRIM_INT,
+ lexer_get_c_as_string(lexer)
+ )
+ );
}
+ if (char_could_start_keyword(&lexer->c)) { return lexer_get_keyword(lexer); }
switch (lexer->c) {
case '\'':
@@ -65,9 +68,13 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
);
break;
case '=':
- return lexer_get_def_const(lexer); break;
- case '-':
- return lexer_get_def_mut(lexer); break;
+ return lexer_next_token(
+ lexer,
+ token_init(
+ TOKEN_DEF_SET,
+ lexer_get_c_as_string(lexer)
+ )
+ ); break;
case '(': return lexer_next_token(
lexer,
token_init(
@@ -101,10 +108,17 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
case ':': return lexer_next_token(
lexer,
token_init(
- TOKEN_VAR_DEF_ARGS_DELIM,
+ TOKEN_DEF_TAGS_DELIM,
lexer_get_c_as_string(lexer)
)
); break;
+ case '/': return lexer_next_token(
+ lexer,
+ token_init(
+ TOKEN_NAMESPACE_DELIM,
+ lexer_get_c_as_string(lexer)
+ )
+ ); break;
case '{': return lexer_next_token(
lexer,
token_init(
@@ -119,9 +133,11 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
lexer_get_c_as_string(lexer)
)
); break;
+ case '[': return lexer_get_array(lexer); break;
case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break;
default:
log_err("Unrecognized token");
+ printf("%s", &lexer->c);
exit(1);
}
}
@@ -162,7 +178,7 @@ token_t* lexer_get_array(lexer_t* lexer) {
lexer_next(lexer); // skip over closing ]
- return token_init(TOKEN_STR, array_so_far); // return the collected array
+ return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array
}
token_t* lexer_get_string(lexer_t* lexer) {
@@ -183,13 +199,14 @@ token_t* lexer_get_string(lexer_t* lexer) {
lexer_next(lexer); // skip over closing '
- return token_init(TOKEN_STR, str_so_far); // return the collected string
+ return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string
}
token_t* lexer_get_comment(lexer_t* lexer) {
lexer_next(lexer);
char* comment_so_far = calloc(1, sizeof(char));
+ comment_so_far[0] = '\0';
while (lexer->c != '`') {
char* current = lexer_get_c_as_string(lexer);
@@ -212,7 +229,7 @@ token_t* lexer_get_directive(lexer_t* lexer) {
char* directive_so_far = calloc(1, sizeof(char));
directive_so_far[0] = '\0';
- while (lexer->c != ';') {
+ while (lexer->c != '#') {
char* current = lexer_get_c_as_string(lexer);
directive_so_far = realloc(
directive_so_far,
@@ -223,7 +240,7 @@ token_t* lexer_get_directive(lexer_t* lexer) {
lexer_next(lexer);
}
- lexer_next(lexer); // skip over closing ;
+ lexer_next(lexer);
return token_init(TOKEN_DIRECTIVE, directive_so_far);
}
@@ -242,6 +259,7 @@ token_t* lexer_get_keyword(lexer_t* lexer) {
lexer_next(lexer);
}
+
return token_init(TOKEN_KEYWORD, keyword_so_far);
}
diff --git a/src/main.c b/src/main.c
index d9e617a..040ef73 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,10 +1,11 @@
#include <stdio.h>
+#include <stdlib.h>
#include "include/log.h"
#include "include/lexer.h"
-#include "include/tree.h"
-#include "include/parser.h"
+// #include "include/tree.h"
+// #include "include/parser.h"
int main(int argc, char* argv[]) {
@@ -42,14 +43,24 @@ int main(int argc, char* argv[]) {
lexer_t* lexer = lexer_init(source);
log_inf("Lexer created");
- parser_t* parser = parser_init(lexer);
- log_inf("Parser created");
+ log_inf("== BEGIN INPUT ==");
+ log_inf(lexer->content);
+ log_inf("=== END INPUT ===");
- tree_t* tree = parser_parse(parser);
- log_inf("Tree root created");
+ token_t* token = NULL;
- printf("TYPE: [%d]\n", tree->type);
- printf("SIZE: [%d]\n", tree->data.subtree.size);
+ while ((token = lexer_get_next_token(lexer)) != NULL) {
+ printf("===\ntoken type: %d:\ntoken value: || %s ||\n===\n", token->type, token->value);
+ }
+
+ //parser_t* parser = parser_init(lexer);
+ //log_inf("Parser created");
+
+ //tree_t* tree = parser_parse(parser);
+ //log_inf("Tree root created");
+
+ //printf("TYPE: [%d]\n", tree->type);
+ //printf("SIZE: [%d]\n", tree->data.subtree.size);
fclose(fsource);
diff --git a/src/token.c b/src/token.c
index 9ea2ccf..432d44f 100644
--- a/src/token.c
+++ b/src/token.c
@@ -37,6 +37,16 @@ int char_could_split_keyword(char* character) {
}
}
+int char_could_start_int(char* character) {
+ for (int i = 0; i < 10; ++ i) {
+ if (TOKEN_CHAR_FIRST_CHAR_INT[i] == *character) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
int char_can_ignore(char* character) {
for (int i = 0; i < TOKEN_CHAR_IGNORE_LEN; ++ i) {
if (TOKEN_CHAR_IGNORE[i] == *character) {