aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorc+12023-10-21 09:10:58 -0400
committerc+12023-10-21 09:10:58 -0400
commit6fc8f91e0d96ae4b4ee59ea562574cc04fdf8abf (patch)
treef7e1a8041f2808eecc60cae54f5dda3f85850c32 /src
parentcc7bb40ae5e7f8f345195547b2c5044efc4d61ba (diff)
Diffstat (limited to 'src')
-rw-r--r--src/include/lexer.h11
-rw-r--r--src/include/token.h7
-rw-r--r--src/include/tree.h46
-rw-r--r--src/lexer.c60
-rw-r--r--src/token.c25
5 files changed, 88 insertions, 61 deletions
diff --git a/src/include/lexer.h b/src/include/lexer.h
index b2bf9eb..83ace59 100644
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@@ -16,8 +16,10 @@ typedef struct LEXER_STRUC {
enum LEXER_STATE {
/* normal 1-character token */
LEXER_STATE_REG,
- /* character */
- LEXER_STATE_CHR,
+ /* definition tag */
+ LEXER_STATE_TAG,
+ /* escaped character in string */
+ LEXER_STATE_ESC,
/* string */
LEXER_STATE_STR,
/* definition */
@@ -46,8 +48,13 @@ void lexer_add_token(lexer_t* lexer, token_t* token);
lexer_do_reg() */
void lexer_add_current_char(lexer_t* lexer, int type);
+/* add first character of lexer's src to the value of the last token in tokenl, if it exists. otherwise, create new token and add it */
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type);
+
/* handle regular state */
void lexer_do_reg(lexer_t*);
+/* handle definition tag state*/
+void lexer_do_tag(lexer_t*);
/* handle character state */
void lexer_do_chr(lexer_t*);
/* handle string state */
diff --git a/src/include/token.h b/src/include/token.h
index 5a3a36c..a186fa9 100644
--- a/src/include/token.h
+++ b/src/include/token.h
@@ -34,11 +34,16 @@ typedef struct TOKEN_STRUC {
} token_t;
/* creates a token */
-token_t* token_init(int type, char* val);
+token_t* token_init(int type, char val);
/* destroys a token **and all tokens contained in nxt** **Make sure to set the nxt of any parent tokens to NULL** */
void token_destroy(token_t* token);
/* return pointer to the last token */
token_t* token_last(token_t* token);
+/* add a character to the token value */
+void token_add_char(token_t*, char);
+
+/* print a token -- for debugging purposes */
+void token_print(token_t* token);
#endif
diff --git a/src/include/tree.h b/src/include/tree.h
index a2b71da..88287a4 100644
--- a/src/include/tree.h
+++ b/src/include/tree.h
@@ -4,44 +4,26 @@
#include <stdlib.h>
typedef struct TREE_STRUC {
- enum {
- TREE_COMP,
- TREE_DEF,
- TREE_CALL,
- TREE_TYPE_STR,
+ enum TREE_TYPE {
TREE_TYPE_INT,
+ TREE_TYPE_STR,
+ TREE_TYPE_DEF,
+ TREE_TYPE_CAL,
+ TREE_TYPE_COND,
} type;
union {
- struct { // === "COMPOUND" ===
- struct TREE_STRUC** value;
- size_t size;
- } comp;
-
- struct { // === DEFINITIONS ===
- char* type; // the definition type
- char** tags; // the definition tags
- size_t tags_size; // the number of tags
- char* name; // the definition name
- struct TREE_STRUC** args; // the arguments the definition will accept
- size_t args_size; // the number of arguments
- struct TREE_STRUC* value; // value of definition
- } def;
-
- struct { // === CALLS ===
- char* target; // name of definition being called
- struct TREE_STRUC** args; // arguments passed to definition
- size_t args_size; // the number of arguments
- } call;
+ struct {
+ int val;
+ } tree_int_t;
- // === TYPES ===
- struct { // strings
- char* value;
- } type_str;
+ struct {
+ char* val;
+ } tree_str_t;
- struct { // integers
- int value;
- } type_int;
+ struct {
+ char* id;
+ } tree_def_t;
} data;
} tree_t;
diff --git a/src/lexer.c b/src/lexer.c
index ba0e8e1..7f36b98 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -36,31 +36,33 @@ void lexer_add_token(lexer_t* lexer, token_t* token) {
lexer->tokenl_last = token;
}
- log_inf("token/v:%s\t/t:%d", token->val, token->type);
lexer->tokenc ++;
}
void lexer_add_current_char(lexer_t* lexer, int type) {
- char* c; /* get the current character as a string */
token_t* t; /* the token to be added */
- c = ecalloc(2, sizeof(char));
- c[0] = *lexer->src;
- c[1] = '\0';
-
- t = token_init(type, c);
+ t = token_init(type, *lexer->src);
lexer_add_token(lexer, t);
}
+void lexer_add_current_char_to_last_token(lexer_t* lexer, int type) {
+ if (lexer->tokenl_last && lexer->tokenl_last->type == type) {
+ token_add_char(lexer->tokenl_last, *lexer->src);
+ } else {
+ lexer_add_current_char(lexer, type);
+ }
+}
+
void lexer_do_reg(lexer_t* lexer) {
switch (*lexer->src) {
case SYNTAX_APPLY:
lexer_add_current_char(lexer, TOKEN_APPLY);
break;
case SYNTAX_TAG_DELIM:
- lexer_add_current_char(lexer, TOKEN_TAG_DELIM);
+ lexer->state = LEXER_STATE_TAG;
break;
case SYNTAX_NAMESPACE_DELIM:
lexer_add_current_char(lexer, TOKEN_NAMESPACE_DELIM);
@@ -83,39 +85,49 @@ void lexer_do_reg(lexer_t* lexer) {
case SYNTAX_EXPR_END:
lexer_add_current_char(lexer, TOKEN_EXPR_END);
break;
- case SYNTAX_STR_DELIM:
- lexer_add_current_char(lexer, TOKEN_STR_DELIM);
- break;
- case SYNTAX_CHAR_DELIM:
- lexer_add_current_char(lexer, TOKEN_CHAR_DELIM);
- break;
case SYNTAX_LIST_DELIM:
lexer_add_current_char(lexer, TOKEN_LIST_DELIM);
break;
+ case SYNTAX_STR_DELIM:
+ lexer->state = LEXER_STATE_STR;
+ break;
default:
lexer_add_current_char(lexer, TOKEN_UNKNOWN);
}
}
-void lexer_do_chr(lexer_t* lexer) {
- if (*lexer->src == '\'') {
- lexer->state = LEXER_STATE_REG;
- } else {
- token_t* t;
-
- t = token_init(TOKEN_CHAR, *lexer->src);
-
- lexer_add_token(lexer, t);
+void lexer_do_tag(lexer_t* lexer) {
+ switch (*lexer->src) {
+ case SYNTAX_SET:
+ lexer_add_current_char(lexer, TOKEN_SET);
+ lexer->state = LEXER_STATE_REG;
+ break;
+ case SYNTAX_APPLY:
+ lexer_add_current_char(lexer, TOKEN_APPLY);
+ lexer->state = LEXER_STATE_REG;
+ break;
+ case SYNTAX_TAG_DELIM:
+ lexer_add_token(lexer, token_init(TOKEN_TAG, '\0'));
+ break;
+ default: lexer_add_current_char_to_last_token(lexer, TOKEN_TAG);
}
}
void lexer_do_str(lexer_t* lexer) {
-
+ if (*lexer->src == SYNTAX_STR_DELIM) {
+ lexer->state = LEXER_STATE_REG;
+ } else {
+ lexer_add_current_char_to_last_token(lexer, TOKEN_STR);
+ }
}
void lexer_run(lexer_t* lexer) {
while (*lexer->src) {
if (lexer->state == LEXER_STATE_REG) { lexer_do_reg(lexer); }
+ else if (lexer->state == LEXER_STATE_TAG) { lexer_do_tag(lexer); }
+ else if (lexer->state == LEXER_STATE_STR) { lexer_do_str(lexer); }
lexer->src ++;
}
+
+ token_print(lexer->tokenl);
}
diff --git a/src/token.c b/src/token.c
index ece32f4..935f23e 100644
--- a/src/token.c
+++ b/src/token.c
@@ -2,12 +2,14 @@
#include "include/token.h"
-token_t* token_init(int type, char* val) {
+token_t* token_init(int type, char val) {
token_t* token;
token = emalloc(sizeof(struct TOKEN_STRUC));
token->type = type;
- token->val = val;
+ token->val = emalloc(2);
+ *token->val = val;
+ token->val[1] = '\0';
token->nxt = NULL;
return token;
@@ -32,3 +34,22 @@ token_t* token_last(token_t* token) {
return t;
}
+
+void token_add_char(token_t* token, char c) {
+ size_t orig;
+
+ orig = strlen(token->val);
+
+ token->val = erealloc(token->val, orig + sizeof c + 1);
+ token->val[orig] = c;
+ token->val[orig + 1] = '\0';
+}
+
+void token_print(token_t* token) {
+
+ log_dbg("token/t=%d\t/v=%s", token->type, token->val);
+
+ if (token->nxt) {
+ token_print(token->nxt);
+ }
+}