yay :)

author: c+1 2023-05-22 15:58:13 -0400
committer: c+1 2023-05-22 15:58:13 -0400
commit: d83d37ecd5682252f85af099c3353525f1cb1394 (patch)
tree: 150f6f0ca3a9c5c217a86cd50d84ea170026537e
parent: 45b152974bb6965209287945fd706d3b0c2df9ba (diff)
6 files changed, 79 insertions, 38 deletions
diff --git a/Makefile b/Makefile
index b6e0252..b6866bc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 exec = halk.out
 sources := $(wildcard src/*.c)
 objects = $(sources:.c=.o)
+sources := $(filter-out src/parser.c, $(sources)) # exclude the incomplete parser for now.
 flags = -g
 
 $(exec): $(objects)
diff --git a/examples/hello.halk b/examples/hello.halk
index 8b6b038..719ba93 100644
--- a/examples/hello.halk
+++ b/examples/hello.halk
@@ -1,9 +1,9 @@
 ` comments in backticks `
 
 ` preprocessor directives `
-#INCLUDE.'math', 'm';                                    ` bring the math library into scope, under the namespace 'm' ` 
-#INCLUDE.'io', '';                                       ` bring the io library into global scope (with no namespace) `
-
+#INCLUDE##math#
+#INCLUDE##io#
+` source code begins here `
 
 str:hello = 'hello, ';                                   ` variables must be given a value at declaration `
 
diff --git a/src/include/token.h b/src/include/token.h
index f7a166f..e52c3a0 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -1,19 +1,27 @@
 #ifndef TOKEN_H
 #define TOKEN_H
 
+#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"
+#define TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN 53
+#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS "1234567890_-"
+#define TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN 12
+#define TOKEN_CHAR_IGNORE " \t\n\r"
+#define TOKEN_CHAR_IGNORE_LEN 4
+#define TOKEN_CHAR_FIRST_CHAR_INT "0123456789"
 
 typedef struct TOKEN_STRUC {
    enum {
       TOKEN_KEYWORD,             // keyword
-      TOKEN_STR_DELIM,           // '
-      TOKEN_STR,                 // 'string'
+      TOKEN_PRIM_STR_DELIM,           // '
+      TOKEN_PRIM_STR,                 // 'string'
+      TOKEN_PRIM_INT,                 // 'string'
       TOKEN_COMM_DELIM,          // `
       TOKEN_COMM,                // `comment`
       TOKEN_EXPR_END,            // ;
       TOKEN_LGROUP,              // (
       TOKEN_RGROUP,              // )
       TOKEN_DIRECTIVE_DELIM,     // #
-      TOKEN_DIRECTIVE,           // #DIRECTIVE;
+      TOKEN_DIRECTIVE,           // #DIRECTIVE#
       TOKEN_FN_APPLY,            // .
       TOKEN_LIST_DELIM,          // ,
       TOKEN_DEF_TAGS_DELIM,      // :
@@ -23,6 +31,7 @@ typedef struct TOKEN_STRUC {
       TOKEN_NAMESPACE_DELIM,     // /
       TOKEN_ARRAY_DELIM_START,   // [
       TOKEN_ARRAY_DELIM_END,     // ]
+      TOKEN_DEF_SET,             // =
       TOKEN_EOF,                 // \0
     } type;
 
@@ -31,17 +40,9 @@ typedef struct TOKEN_STRUC {
 
 token_t* token_init(int type, char* val);
 
-char TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS[] = "abcdefghijklmnopqrstuvwxyz_";   // chars that can begin a var name
-int  TOKEN_DEFNAME_FIRST_CHAR_ALLOWED_CHARS_LEN = 27; // maximum efficiency!
-char TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS[] = "1234567890_-";                  // chars that can be in the rest of the var name,
-                                                                                 // not including the ones already defined to begin
-                                                                                 // one.
-int  TOKEN_DEFNAME_SPLIT_CHAR_ALLOWED_CHARS_LEN = 12;
-char TOKEN_CHAR_IGNORE[] = " \t\n\r";  // characters to ignore while parsing tokens
-int  TOKEN_CHAR_IGNORE_LEN = 4;
-
 int char_could_start_keyword(char* character);
 int char_could_split_keyword(char* character);
+int char_could_start_int(char* character);
 int char_can_ignore(char* character);
 
 
diff --git a/src/lexer.c b/src/lexer.c
index 484766f..5a04da0 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -6,7 +6,6 @@
 
 #include "include/log.h"
 #include "include/lexer.h" 
-#include "include/token.h"
 
 
 lexer_t* lexer_init(char* content) {
@@ -40,13 +39,17 @@ void lexer_pass(lexer_t* lexer) {
 token_t* lexer_get_next_token(lexer_t* lexer) {
    while (LEXER_VALID) {
 
-      if (char_can_ignore(&lexer->c)) { 
-         lexer_pass(lexer); 
-      }
-
-      if (char_could_start_keyword(&lexer->c)) { 
-         return lexer_get_keyword(lexer); 
+      if (char_can_ignore(&lexer->c)) { lexer_pass(lexer); }
+      if (char_could_start_int(&lexer->c)) {
+         return lexer_next_token(
+               lexer,
+               token_init(
+                  TOKEN_PRIM_INT,
+                  lexer_get_c_as_string(lexer)
+                  )
+               );
       }
+      if (char_could_start_keyword(&lexer->c)) {  return lexer_get_keyword(lexer); }
 
       switch (lexer->c) {
          case '\'':
@@ -65,9 +68,13 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
                   );
             break;
          case '=':
-            return lexer_get_def_const(lexer); break;
-         case '-':
-            return lexer_get_def_mut(lexer); break;
+            return lexer_next_token(
+                  lexer,
+                  token_init(
+                     TOKEN_DEF_SET,
+                     lexer_get_c_as_string(lexer)
+                     )
+                  ); break;
          case '(': return lexer_next_token(
             lexer,
             token_init(
@@ -101,10 +108,17 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
          case ':': return lexer_next_token(
              lexer,
              token_init(
-                TOKEN_VAR_DEF_ARGS_DELIM,
+                TOKEN_DEF_TAGS_DELIM,
                 lexer_get_c_as_string(lexer)
                 )
              ); break;
+         case '/': return lexer_next_token(
+                         lexer,
+                         token_init(
+                            TOKEN_NAMESPACE_DELIM,
+                            lexer_get_c_as_string(lexer)
+                            )
+                         ); break;
          case '{': return lexer_next_token(
              lexer,
              token_init(
@@ -119,9 +133,11 @@ token_t* lexer_get_next_token(lexer_t* lexer) {
                 lexer_get_c_as_string(lexer)
                 )
              ); break;
+         case '[': return lexer_get_array(lexer); break;
          case '\0': return token_init(TOKEN_EOF, lexer_get_c_as_string(lexer)); break;
          default:
             log_err("Unrecognized token");
+            printf("%s", &lexer->c);
             exit(1);
       }
    }
@@ -162,7 +178,7 @@ token_t* lexer_get_array(lexer_t* lexer) {
 
    lexer_next(lexer);   // skip over closing ]
 
-   return token_init(TOKEN_STR, array_so_far); // return the collected array
+   return token_init(TOKEN_PRIM_STR, array_so_far); // return the collected array
 }
 
 token_t* lexer_get_string(lexer_t* lexer) {
@@ -183,13 +199,14 @@ token_t* lexer_get_string(lexer_t* lexer) {
 
    lexer_next(lexer);   // skip over closing '
 
-   return token_init(TOKEN_STR, str_so_far); // return the collected string
+   return token_init(TOKEN_PRIM_STR, str_so_far); // return the collected string
 }
 
 token_t* lexer_get_comment(lexer_t* lexer) {
    lexer_next(lexer);
 
    char* comment_so_far = calloc(1, sizeof(char));
+   comment_so_far[0] = '\0';
 
    while (lexer->c != '`') {
       char* current = lexer_get_c_as_string(lexer);
@@ -212,7 +229,7 @@ token_t* lexer_get_directive(lexer_t* lexer) {
    char* directive_so_far = calloc(1, sizeof(char));
    directive_so_far[0] = '\0';
 
-   while (lexer->c != ';') {
+   while (lexer->c != '#') {
       char* current = lexer_get_c_as_string(lexer);
       directive_so_far = realloc(
          directive_so_far, 
@@ -223,7 +240,7 @@ token_t* lexer_get_directive(lexer_t* lexer) {
       lexer_next(lexer);
    }
 
-   lexer_next(lexer);   // skip over closing ;
+   lexer_next(lexer);
 
    return token_init(TOKEN_DIRECTIVE, directive_so_far);
 }
@@ -242,6 +259,7 @@ token_t* lexer_get_keyword(lexer_t* lexer) {
       lexer_next(lexer);
    }
 
+
    return token_init(TOKEN_KEYWORD, keyword_so_far);
 }
 
diff --git a/src/main.c b/src/main.c
index d9e617a..040ef73 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,10 +1,11 @@
 #include <stdio.h>
+#include <stdlib.h>
 
 
 #include "include/log.h"
 #include "include/lexer.h"
-#include "include/tree.h"
-#include "include/parser.h"
+// #include "include/tree.h"
+// #include "include/parser.h"
 
 
 int main(int argc, char* argv[]) {
@@ -42,14 +43,24 @@ int main(int argc, char* argv[]) {
    lexer_t* lexer = lexer_init(source);
    log_inf("Lexer created");
 
-   parser_t* parser = parser_init(lexer);
-   log_inf("Parser created");
+   log_inf("== BEGIN INPUT ==");
+   log_inf(lexer->content);
+   log_inf("=== END INPUT ===");
 
-   tree_t* tree = parser_parse(parser);
-   log_inf("Tree root created");
+   token_t* token = NULL;
 
-   printf("TYPE: [%d]\n", tree->type);
-   printf("SIZE: [%d]\n", tree->data.subtree.size);
+   while ((token = lexer_get_next_token(lexer)) != NULL) {
+      printf("===\ntoken type: %d:\ntoken value: || %s ||\n===\n", token->type, token->value);
+   }
+
+   //parser_t* parser = parser_init(lexer);
+   //log_inf("Parser created");
+
+   //tree_t* tree = parser_parse(parser);
+   //log_inf("Tree root created");
+
+   //printf("TYPE: [%d]\n", tree->type);
+   //printf("SIZE: [%d]\n", tree->data.subtree.size);
 
    fclose(fsource);
 
diff --git a/src/token.c b/src/token.c
index 9ea2ccf..432d44f 100644
--- a/src/token.c
+++ b/src/token.c
@@ -37,6 +37,16 @@ int char_could_split_keyword(char* character) {
    }
 }
 
+int char_could_start_int(char* character) {
+   for (int i = 0; i < 10; ++ i) {
+      if (TOKEN_CHAR_FIRST_CHAR_INT[i] == *character) {
+         return 1;
+      }
+   }
+
+   return 0;
+}
+
 int char_can_ignore(char* character) {
    for (int i = 0; i < TOKEN_CHAR_IGNORE_LEN; ++ i) {
       if (TOKEN_CHAR_IGNORE[i] == *character) {
author	c+1	2023-05-22 15:58:13 -0400
committer	c+1	2023-05-22 15:58:13 -0400
commit	d83d37ecd5682252f85af099c3353525f1cb1394 (patch)
tree	150f6f0ca3a9c5c217a86cd50d84ea170026537e
parent	45b152974bb6965209287945fd706d3b0c2df9ba (diff)