From 6fc8f91e0d96ae4b4ee59ea562574cc04fdf8abf Mon Sep 17 00:00:00 2001
From: c+1
Date: Sat, 21 Oct 2023 09:10:58 -0400
Subject: ⬣

---
 src/include/lexer.h | 11 ++++++++--
 src/include/token.h |  7 ++++++-
 src/include/tree.h  | 46 +++++++++++++---------------------------
 src/lexer.c         | 60 ++++++++++++++++++++++++++++++++---------------------
 src/token.c         | 25 ++++++++++++++++++++--
 5 files changed, 88 insertions(+), 61 deletions(-)

(limited to 'src')

diff --git a/src/include/lexer.h b/src/include/lexer.h
index b2bf9eb..83ace59 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -16,8 +16,10 @@ typedef struct LEXER_STRUC {
    enum LEXER_STATE {
       /* normal 1-character token */
       LEXER_STATE_REG,
-      /* character */
-      LEXER_STATE_CHR,
+      /* definition tag */
+      LEXER_STATE_TAG,
+      /* escaped character in string */
+      LEXER_STATE_ESC,
       /* string */
       LEXER_STATE_STR,
       /* definition */
@@ -46,8 +48,13 @@ void lexer_add_token(lexer_t* lexer, token_t* token);
    lexer_do_reg() */
 void lexer_add_current_char(lexer_t* lexer, int type);
 
+/* add first character of lexer's src to the value of the last token in tokenl, if it exists. otherwise, create new token and add it */
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type);
+
 /* handle regular state */
 void lexer_do_reg(lexer_t*);
+/* handle definition tag state*/
+void lexer_do_tag(lexer_t*);
 /* handle character state */
 void lexer_do_chr(lexer_t*);
 /* handle string state */
diff --git a/src/include/token.h b/src/include/token.h
index 5a3a36c..a186fa9 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -34,11 +34,16 @@ typedef struct TOKEN_STRUC {
 } token_t;
 
 /* creates a token */
-token_t* token_init(int type, char* val);
+token_t* token_init(int type, char val);
 /* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */
 void token_destroy(token_t* token);
 
 /* return pointer to the last token */
 token_t* token_last(token_t* token);
+/* add a character to the token value */
+void token_add_char(token_t*, char);
+
+/* print a token -- for debugging purposes */
+void token_print(token_t* token);
 
 #endif
diff --git a/src/include/tree.h b/src/include/tree.h
index a2b71da..88287a4 100644
--- a/src/include/tree.h
+++ b/src/include/tree.h
@@ -4,44 +4,26 @@
 #include <stdlib.h>
 
 typedef struct TREE_STRUC {
-   enum {
-      TREE_COMP,
-      TREE_DEF,
-      TREE_CALL,
-      TREE_TYPE_STR,
+   enum TREE_TYPE {
       TREE_TYPE_INT,
+      TREE_TYPE_STR,
+      TREE_TYPE_DEF,
+      TREE_TYPE_CAL,
+      TREE_TYPE_COND,
    } type;
 
    union {
-      struct {                               // === "COMPOUND" ===
-         struct TREE_STRUC**  value;
-         size_t               size;
-      } comp;
-
-      struct {                               // === DEFINITIONS ===
-         char*                type;          // the definition type
-         char**               tags;          // the definition tags
-         size_t               tags_size;     // the number of tags
-         char*                name;          // the definition name
-         struct TREE_STRUC**  args;          // the arguments the definition will accept
-         size_t               args_size;     // the number of arguments
-         struct TREE_STRUC*   value;         // value of definition
-      } def;
-
-      struct {                               // === CALLS ===
-         char*                target;        // name of definition being called
-         struct TREE_STRUC**  args;          // arguments passed to definition
-         size_t               args_size;     // the number of arguments
-      } call;
+      struct {
+         int val;
+      } tree_int_t;
 
-                                             // === TYPES ===
-      struct {                               // strings
-         char*                value;
-      } type_str; 
+      struct {
+         char* val;
+      } tree_str_t;
 
-      struct {                               // integers
-         int                  value;
-      } type_int;
+      struct {
+         char* id;
+      } tree_def_t;
    } data;
 } tree_t;
 
diff --git a/src/lexer.c b/src/lexer.c
index ba0e8e1..7f36b98 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -36,31 +36,33 @@ void lexer_add_token(lexer_t* lexer, token_t* token) {
       lexer->tokenl_last = token;
    }
 
-   log_inf("token/v:%s\t/t:%d", token->val, token->type);
 
    lexer->tokenc ++;
 }
 
 void lexer_add_current_char(lexer_t* lexer, int type) {
-   char* c;    /* get the current character as a string */
    token_t* t; /* the token to be added */
 
-   c = ecalloc(2, sizeof(char));
-   c[0] = *lexer->src;
-   c[1] = '\0';
-
-   t = token_init(type, c);
+   t = token_init(type, *lexer->src);
 
    lexer_add_token(lexer, t);
 }
 
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) {
+   if (lexer->tokenl_last && lexer->tokenl_last->type == type) {
+      token_add_char(lexer->tokenl_last, *lexer->src);
+   } else {
+      lexer_add_current_char(lexer, type);
+   }
+}
+
 void lexer_do_reg(lexer_t* lexer) {
    switch (*lexer->src) {
       case SYNTAX_APPLY:
          lexer_add_current_char(lexer, TOKEN_APPLY);
          break;
       case SYNTAX_TAG_DELIM:
-         lexer_add_current_char(lexer, TOKEN_TAG_DELIM);
+         lexer->state = LEXER_STATE_TAG;
          break;
       case SYNTAX_NAMESPACE_DELIM:
          lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM);
@@ -83,39 +85,49 @@ void lexer_do_reg(lexer_t* lexer) {
       case SYNTAX_EXPR_END:
          lexer_add_current_char(lexer, TOKEN_EXPR_END);
          break;
-      case SYNTAX_STR_DELIM:
-         lexer_add_current_char(lexer, TOKEN_STR_DELIM);
-         break;
-      case SYNTAX_CHAR_DELIM:
-         lexer_add_current_char(lexer, TOKEN_CHAR_DELIM);
-         break;
       case SYNTAX_LIST_DELIM:
          lexer_add_current_char(lexer, TOKEN_LIST_DELIM);
          break;
+      case SYNTAX_STR_DELIM:
+         lexer->state = LEXER_STATE_STR;
+         break;
       default:
          lexer_add_current_char(lexer, TOKEN_UNKNOWN);
    }
 }
 
-void lexer_do_chr(lexer_t* lexer) {
-   if (*lexer->src == '\'') {
-      lexer->state = LEXER_STATE_REG;
-   } else {
-      token_t* t;
-
-      t = token_init(TOKEN_CHAR, *lexer->src);
-
-      lexer_add_token(lexer, t);
+void lexer_do_tag(lexer_t* lexer) {
+   switch (*lexer->src) {
+      case SYNTAX_SET:
+         lexer_add_current_char(lexer, TOKEN_SET);
+         lexer->state = LEXER_STATE_REG;
+         break;
+      case SYNTAX_APPLY:
+         lexer_add_current_char(lexer, TOKEN_APPLY);
+         lexer->state = LEXER_STATE_REG;
+         break;
+      case SYNTAX_TAG_DELIM:
+         lexer_add_token(lexer, token_init(TOKEN_TAG, '\0'));
+         break;
+      default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG);
    }
 }
 
 void lexer_do_str(lexer_t* lexer) {
-
+   if (*lexer->src == SYNTAX_STR_DELIM) {
+      lexer->state = LEXER_STATE_REG;
+   } else {
+      lexer_add_current_char_to_last_token(lexer, TOKEN_STR);
+   }
 }
 
 void lexer_run(lexer_t* lexer) {
    while (*lexer->src) {
       if (lexer->state == LEXER_STATE_REG) { lexer_do_reg(lexer); }
+      else if (lexer->state == LEXER_STATE_TAG) { lexer_do_tag(lexer); }
+      else if (lexer->state == LEXER_STATE_STR) { lexer_do_str(lexer); }
       lexer->src ++;
    }
+
+   token_print(lexer->tokenl);
 }
diff --git a/src/token.c b/src/token.c
index ece32f4..935f23e 100644
--- a/src/token.c
+++ b/src/token.c
@@ -2,12 +2,14 @@
 
 #include "include/token.h"
 
-token_t* token_init(int type, char* val) {
+token_t* token_init(int type, char val) {
    token_t* token;
 
    token = emalloc(sizeof(struct TOKEN_STRUC));
    token->type = type;
-   token->val = val;
+   token->val = emalloc(2);
+   *token->val = val;
+   token->val[1] = '\0';
    token->nxt = NULL;
 
    return token;
@@ -32,3 +34,22 @@ token_t* token_last(token_t* token) {
 
    return t;
 }
+
+void token_add_char(token_t* token, char c) {
+   size_t orig;
+
+   orig = strlen(token->val);
+
+   token->val = erealloc(token->val, orig + sizeof c + 1);
+   token->val[orig] = c;
+   token->val[orig + 1] = '\0';
+}
+
+void token_print(token_t* token) {
+
+   log_dbg("token/t=%d\t/v=%s", token->type, token->val);
+
+   if (token->nxt) {
+      token_print(token->nxt);
+   }
+}
-- 
cgit v1.2.3