aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/hello.halk11
-rw-r--r--src/include/lexer.h3
-rw-r--r--src/include/token.h2
-rw-r--r--src/lexer.c102
-rw-r--r--src/main.c2
5 files changed, 29 insertions, 91 deletions
diff --git a/examples/hello.halk b/examples/hello.halk
index 5ff4a0b..0bd40fe 100644
--- a/examples/hello.halk
+++ b/examples/hello.halk
@@ -1,14 +1,5 @@
-` comments in backticks `
-` preprocessor directives `
-#!PI# #3.14159# ` define macros with #!<name># #<value># `
-#IFNDEF# #HELLO# ` predefined macros are: IF, AND, OR, NOT, ELIF, ELSE, FI, INCLUDE `
- #HELLO# ` HELLO defined `
-#ENDIF#
-
-#INCLUDE# #io# ` include the 'io' header `
-
-` source code begins here `
+:str:hello = 'qwertyuiopasdfghjklzxcvbnm1234567890lakjsdhfpqiuelljaksdfbvvvviu3o4448y5o23ilfn';
:str:hello = 'hello, ';
diff --git a/src/include/lexer.h b/src/include/lexer.h
index a86f5bb..853a35c 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -28,8 +28,7 @@ extern char* lexer_get_c_as_string (lexer_t* lexer);
// collectors
extern token_t* lexer_get_arr (lexer_t* lexer);
-extern token_t* lexer_get_str (lexer_t* lexer);
-extern token_t* lexer_get_com (lexer_t* lexer);
+extern token_t* lexer_collect (lexer_t* lexer, char end_char, int fskip, int lskip, int type);
// special def collectors
extern token_t* lexer_get_directive (lexer_t* lexer);
diff --git a/src/include/token.h b/src/include/token.h
index 8964ccd..b58233d 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -10,7 +10,7 @@
#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789"
typedef struct TOKEN_STRUC {
- enum {
+ enum TOKEN_ENUM {
TOKEN_KEYWORD, // keyword
TOKEN_PRIM_STR_DELIM, // '
TOKEN_PRIM_STR, // 'string'
diff --git a/src/lexer.c b/src/lexer.c
index e5d3c24..d137a07 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -53,10 +53,10 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
switch (lexer->c) {
case '\'':
- return lexer_get_str(lexer);
+ return lexer_collect(lexer, '\'', 1, 1, TOKEN_PRIM_STR);
break;
case '`':
- return lexer_get_com(lexer);
+ return lexer_collect(lexer, '`', 1, 1, TOKEN_COMM);
break;
case ';':
return lexer_next_token(
@@ -90,7 +90,8 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
)
); break;
case '#':
- return lexer_get_directive(lexer); break;
+ return lexer_collect(lexer, '#', 1, 1, TOKEN_DIRECTIVE);
+ break;
case '.': return lexer_next_token(
lexer,
token_init(
@@ -133,7 +134,9 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
lexer_get_c_as_string(lexer)
)
); break;
- case '[': return lexer_get_arr(lexer); break;
+ case '[':
+ return lexer_collect(lexer, ']', 1, 1, TOKEN_PRIM_STR);
+ break;
case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break;
default:
log_err("Unrecognized token");
@@ -159,90 +162,37 @@ char* lexer_get_c_as_string(lexer_t* lexer) {
return str;
}
-// TODO: abstract away this kind of thing
-token_t* lexer_get_arr(lexer_t* lexer) {
- lexer_next(lexer); // skip opening [
- char* array_so_far = calloc(1, sizeof(char));
- array_so_far[0] = '\0';
-
- while (lexer->c != ']') {
- char* current = lexer_get_c_as_string(lexer);
- array_so_far = realloc(
- array_so_far,
- (strlen(array_so_far) + strlen(current) * sizeof(char))
- );
-
- strcat(array_so_far, current);
- lexer_next(lexer);
- }
-
- lexer_next(lexer); // skip over closing ]
-
- return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array
-}
-
-token_t* lexer_get_str(lexer_t* lexer) {
- lexer_next(lexer);
- char* str_so_far = calloc(1, sizeof(char));
- str_so_far[0] = '\0';
-
- while (lexer->c != '\'') {
- char* current = lexer_get_c_as_string(lexer);
- str_so_far = realloc(
- str_so_far,
- (strlen(str_so_far) + strlen(current) * sizeof(char))
- );
-
- strcat(str_so_far, current);
- lexer_next(lexer);
+// fskip: skip first char?
+// lskip: skip last char?
+token_t* lexer_collect(lexer_t* lexer, char end_char, int fskip, int lskip, int type) {
+ if (fskip) {
+ lexer_next(lexer); // skip over starting character, e.g. opening quote or grave
}
- lexer_next(lexer); // skip over closing '
-
- return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string
-}
-
-token_t* lexer_get_com(lexer_t* lexer) {
- lexer_next(lexer);
+ size_t len = 0; // length of collected token so far
+ char* token = calloc(len, sizeof(char));
+ token[0] = '\0';
- char* comment_so_far = calloc(1, sizeof(char));
- comment_so_far[0] = '\0';
-
- while (lexer->c != '`') {
+ while (lexer->c != end_char) {
char* current = lexer_get_c_as_string(lexer);
- comment_so_far = realloc(
- comment_so_far,
- (strlen(comment_so_far) + strlen(current) * sizeof(char))
+ printf("[%p, %ld]\n", token, (strlen(token) + strlen(current) * sizeof(char)));
+ token = realloc(
+ token,
+ (len + strlen(current) * sizeof(char))
);
- strcat(comment_so_far, current);
+ memcpy(token + len, current, strlen(current) * sizeof(char));
+ len += strlen(current) * sizeof(char);
lexer_next(lexer);
}
- lexer_next(lexer); // skip over closing `
-
- return token_init(TOKEN_COMM, comment_so_far);
-}
-
-token_t* lexer_get_directive(lexer_t* lexer) {
- lexer_next(lexer);
- char* directive_so_far = calloc(1, sizeof(char));
- directive_so_far[0] = '\0';
-
- while (lexer->c != '#') {
- char* current = lexer_get_c_as_string(lexer);
- directive_so_far = realloc(
- directive_so_far,
- (strlen(directive_so_far) + strlen(current) * sizeof(char))
- );
-
- strcat(directive_so_far, current);
+ if (lskip) {
lexer_next(lexer);
}
- lexer_next(lexer);
+ token[len] = '\0'; // null terminate
- return token_init(TOKEN_DIRECTIVE, directive_so_far);
+ return token_init(type, token);
}
token_t* lexer_get_keyword(lexer_t* lexer) {
@@ -259,7 +209,5 @@ token_t* lexer_get_keyword(lexer_t* lexer) {
lexer_next(lexer);
}
-
return token_init(TOKEN_KEYWORD, keyword_so_far);
}
-
diff --git a/src/main.c b/src/main.c
index 755a4e8..040ef73 100644
--- a/src/main.c
+++ b/src/main.c
@@ -13,7 +13,7 @@ int main(int argc, char* argv[]) {
long fsource_size;
char *source;
- fsource = fopen ("examples/hello.halk", "rb");
+ fsource = fopen ("examples/hello.halk", "rb");
if (!fsource) {
log_err("Source file not found");
exit(1);